diff --git a/Extras/simdmathlibrary/LICENSE b/Extras/simdmathlibrary/LICENSE new file mode 100644 index 000000000..3ceee30c6 --- /dev/null +++ b/Extras/simdmathlibrary/LICENSE @@ -0,0 +1,28 @@ +/* SIMD math library functions for both the PowerPC (PPU) and the SPU. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ diff --git a/Extras/simdmathlibrary/Makefile b/Extras/simdmathlibrary/Makefile new file mode 100644 index 000000000..f08e28d94 --- /dev/null +++ b/Extras/simdmathlibrary/Makefile @@ -0,0 +1,120 @@ +# Toplevel make file to build the libsimdmath library for both SPU and PPU +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# How to build: +# +# To build 32 bit libraries: +# +# make ARCH_PPU=32 +# +# To use "gcc" instead of "ppu-gcc". +# +# make CROSS_PPU= + +prefix = /usr +DESTDIR = + +ARCH_PPU = 64 +CROSS_PPU = ppu- +AR_PPU = $(CROSS_PPU)ar +CC_PPU = $(CROSS_PPU)gcc +CXX_PPU = $(CROSS_PPU)g++ +RANLIB_PPU = $(CROSS_PPU)ranlib +TEST_CMD_PPU = + +ARCH_CFLAGS_PPU = -m$(ARCH_PPU) -maltivec -mabi=altivec + +CROSS_SPU = spu- +AR_SPU = $(CROSS_SPU)ar +CC_SPU = $(CROSS_SPU)gcc +CXX_SPU = $(CROSS_SPU)g++ +RANLIB_SPU = $(CROSS_SPU)ranlib +TEST_CMD_SPU = + +INSTALL = install + +MAKE_DEFS = \ + prefix='$(prefix)' \ + DESTDIR='$(DESTDIR)' \ + LIB_MAJOR_VERSION='$(LIB_MAJOR_VERSION)' \ + LIB_MINOR_VERSION='$(LIB_MINOR_VERSION)' \ + LIB_BASE='$(LIB_BASE)' \ + LIB_NAME='$(LIB_NAME)' \ + STATIC_LIB='$(STATIC_LIB)' \ + SHARED_LIB='$(SHARED_LIB)' \ + ARCH_PPU='$(ARCH_PPU)' \ + ARCH_CFLAGS_PPU='$(ARCH_CFLAGS_PPU)' \ + CROSS_PPU='$(CROSS_PPU)' \ + AR_PPU='$(AR_PPU)' \ + CC_PPU='$(CC_PPU)' \ + CXX_PPU='$(CXX_PPU)' \ + RANLIB_PPU='$(RANLIB_PPU)' \ + TEST_CMD_PPU='$(TEST_CMD_PPU)' \ + CROSS_SPU='$(CROSS_SPU)' \ + AR_SPU='$(AR_SPU)' \ + CC_SPU='$(CC_SPU)' \ + CXX_SPU='$(CXX_SPU)' \ + RANLIB_SPU='$(RANLIB_SPU)' \ + TEST_CMD_SPU='$(TEST_CMD_SPU)' \ + INSTALL='$(INSTALL)' + +LIB_MAJOR_VERSION = 1 +LIB_MINOR_VERSION = 0 + +LIB_BASE = simdmath +LIB_NAME = lib$(LIB_BASE) +STATIC_LIB = $(LIB_NAME).a +SHARED_LIB = $(LIB_NAME).so + +all: spu_library ppu_library + +spu_library: + cd spu; $(MAKE) $(MAKE_DEFS) + +ppu_library: + cd ppu; $(MAKE) $(MAKE_DEFS) + +install: spu_install ppu_install + +spu_install: + cd spu; $(MAKE) $(MAKE_DEFS) install + +ppu_install: + cd ppu; $(MAKE) $(MAKE_DEFS) install + +clean: + cd spu; $(MAKE) $(MAKE_DEFS) clean + cd ppu; $(MAKE) $(MAKE_DEFS) clean + +check: check_ppu check_spu + +check_ppu: + cd ppu; $(MAKE) $(MAKE_DEFS) check + +check_spu: + cd spu; $(MAKE) $(MAKE_DEFS) check diff --git a/Extras/simdmathlibrary/ppu/Makefile b/Extras/simdmathlibrary/ppu/Makefile new file mode 100644 index 000000000..ad1f677c4 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/Makefile @@ -0,0 +1,147 @@ +# make file to build the libsimdmath library for PPU +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + + +# All that you do to add a file is edit OBJS, the rest will just work + +prefix = /usr +DESTDIR = + +OBJS = fabsf4.o absi4.o truncf4.o sqrtf4.o tanf4.o \ + negatef4.o fmaf4.o copysignf4.o modff4.o \ + fmaxf4.o fminf4.o fdimf4.o sinf4.o asinf4.o \ + floorf4.o recipf4.o ceilf4.o divf4.o divi4.o \ + cosf4.o hypotf4.o cbrtf4.o logf4.o sincosf4.o \ + rsqrtf4.o log2f4.o ldexpf4.o expf4.o frexpf4.o \ + expm1f4.o logbf4.o log1pf4.o log10f4.o ilogbf4.o \ + fmodf4.o negatei4.o exp2f4.o powf4.o atanf4.o \ + atan2f4.o acosf4.o + +INCLUDES_PPU = -I../ + +ARCH_PPU = 64 +CROSS_PPU = ppu- +AR_PPU = $(CROSS_PPU)ar +CC_PPU = $(CROSS_PPU)gcc +CXX_PPU = $(CROSS_PPU)g++ +RANLIB_PPU = $(CROSS_PPU)ranlib +TEST_CMD_PPU = + +ARCH_CFLAGS_PPU = -m$(ARCH_PPU) -maltivec -mabi=altivec +CFLAGS_PPU=$(INCLUDES_PPU) -O2 -W -Wall -std=gnu99 $(ARCH_CFLAGS_PPU) -fPIC +LDFLAGS_PPU = $(ARCH_CFLAGS_PPU) + +INSTALL = install + +MAKE_DEFS = \ + prefix='$(prefix)' \ + DESTDIR='$(DESTDIR)' \ + LIB_BASE='$(LIB_BASE)' \ + LIB_NAME='$(LIB_NAME)' \ + STATIC_LIB='$(STATIC_LIB)' \ + SHARED_LIB='$(SHARED_LIB)' \ + ARCH_PPU='$(ARCH_PPU)' \ + ARCH_CFLAGS_PPU='$(ARCH_CFLAGS_PPU)' \ + CROSS_PPU='$(CROSS_PPU)' \ + AR_PPU='$(AR_PPU)' \ + CC_PPU='$(CC_PPU)' \ + CXX_PPU='$(CXX_PPU)' \ + RANLIB_PPU='$(RANLIB_PPU)' \ + TEST_CMD_PPU='$(TEST_CMD_PPU)' \ + INSTALL='$(INSTALL)' + +LIB_MAJOR_VERSION = 1 +LIB_MINOR_VERSION = 0 + +LIB_BASE = simdmath +LIB_NAME = lib$(LIB_BASE) +STATIC_LIB = $(LIB_NAME).a +SHARED_LIB = $(LIB_NAME).so +SHARED_LIB_SONAME = $(SHARED_LIB).$(LIB_MAJOR_VERSION) +SHARED_LIB_FULL = $(SHARED_LIB).$(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION) + +ALL_LIBS = $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_FULL) $(SHARED_LIB_SONAME) + +all: $(ALL_LIBS) + +static: $(STATIC_LIB) + +shared: $(SHARED_LIB) + +$(STATIC_LIB): $(OBJS) + $(AR_PPU) cr $@ $(OBJS) + $(RANLIB_PPU) $@ + +$(SHARED_LIB): $(OBJS) + $(CC_PPU) -shared $(OBJS) -o $@ $(LDFLAGS_PPU) -Wl,-h,$(SHARED_LIB_SONAME) + +$(SHARED_LIB_SONAME) $(SHARED_LIB_FULL): $(SHARED_LIB) + ln -fs $(SHARED_LIB) $@ + +install: $(ALL_LIBS) + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include + $(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/include/ + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/lib + $(INSTALL) -m 644 $(STATIC_LIB) $(DESTDIR)$(prefix)/lib/$(STATIC_LIB) + $(INSTALL) -m 755 $(SHARED_LIB) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_FULL) + ln -fs $(SHARED_LIB_FULL) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_SONAME) + ln -fs $(SHARED_LIB_SONAME) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB) + +clean: + cd tests; $(MAKE) $(MAKE_DEFS) clean + rm -f $(OBJS) + rm -f $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_SONAME) $(SHARED_LIB_FULL) + +$(OBJS): ../simdmath.h common-types.h + +check: $(ALL_LIBS) + cd tests; $(MAKE) $(MAKE_DEFS) all; $(MAKE) $(MAKE_DEFS) check + + +# Some Objects have special header files. +sinf4.o cosf4.o sincosf4.o tanf4.o: sincos_c.h + + +%.o: %.c + $(CC_PPU) $(CFLAGS_PPU) -c $< + +#---------- +# C++ +#---------- +%.o: %.C + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cpp + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cc + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cxx + $(CXX_PPU) $(CFLAGS_PPU) -c $< diff --git a/Extras/simdmathlibrary/ppu/absi4.c b/Extras/simdmathlibrary/ppu/absi4.c new file mode 100644 index 000000000..dc0d8b474 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/absi4.c @@ -0,0 +1,40 @@ +/* absi4 - for each of four integer slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include + +vector signed int +absi4 (vector signed int x) +{ + return vec_abs( x ); +} + diff --git a/Extras/simdmathlibrary/ppu/acosf4.c b/Extras/simdmathlibrary/ppu/acosf4.c new file mode 100644 index 000000000..88255e50e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/acosf4.c @@ -0,0 +1,79 @@ +/* acosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +// +// Computes the inverse cosine of all four slots of x. +// +vector float +acosf4 (vector float x) +{ + vec_float4 result, xabs; + vec_float4 t1; + vec_float4 xabs2, xabs4; + vec_float4 hi, lo; + vec_float4 neg, pos; + vec_uint4 select; + + xabs = vec_abs(x); + select = (vec_uint4)(vec_sra((vec_int4)(x), ((vec_uint4){31, 31, 31, 31}) )); + + t1 = sqrtf4(vec_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + xabs2 = vec_madd(xabs, xabs, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + xabs4 = vec_madd(xabs2, xabs2, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + hi = vec_madd( ((vec_float4){-0.0012624911, -0.0012624911, -0.0012624911, -0.0012624911}) , xabs, ((vec_float4){0.0066700901, 0.0066700901, 0.0066700901, 0.0066700901}) ); + hi = vec_madd(hi, xabs, vec_splatsf4(-0.0170881256)); + hi = vec_madd(hi, xabs, vec_splatsf4( 0.0308918810)); + lo = vec_madd(vec_splatsf4(-0.0501743046), xabs, vec_splatsf4(0.0889789874)); + lo = vec_madd(lo, xabs, vec_splatsf4(-0.2145988016)); + lo = vec_madd(lo, xabs, vec_splatsf4( 1.5707963050)); + + result = vec_madd(hi, xabs4, lo); + + /* Adjust the result if x is negactive. + */ + neg = vec_nmsub(t1, result, vec_splatsf4(3.1415926535898f)); + pos = vec_madd(t1, result, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + + result = vec_sel(pos, neg, select); + + return result; +} + + diff --git a/Extras/simdmathlibrary/ppu/asinf4.c b/Extras/simdmathlibrary/ppu/asinf4.c new file mode 100644 index 000000000..3bf25ad9f --- /dev/null +++ b/Extras/simdmathlibrary/ppu/asinf4.c @@ -0,0 +1,86 @@ +/* asinf4 - Computes the inverse sine of all four slots of x + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float asinf4 (vector float x) +{ + // positive = (x > 0) + // + vec_uint4 positive = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.0f)); + + // x = absf(x) + // + x = vec_abs(x); + + // gtHalf = (|x| > 0.5) + // + vec_uint4 gtHalf = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.5f)); + + + // if (x > 0.5) + // g = 0.5 - 0.5*x + // x = -2 * sqrtf(g) + // else + // g = x * x + // + vec_float4 g = vec_sel(vec_madd(x,x,vec_splatsf4(0.0f)),vec_madd(vec_splatsf4(-0.5f),x,vec_splatsf4(0.5f)),gtHalf); + + x = vec_sel(x,vec_madd(vec_splatsf4(-2.0f),sqrtf4(g),vec_splatsf4(0.0f)),gtHalf); + + // Compute the polynomials and take their ratio + // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f + // num = x * g * (-0.504400557f * g + 0.933933258f) + // + vec_float4 denom = vec_add(g,vec_splatsf4(-5.54846723f)); + vec_float4 num = vec_madd(vec_splatsf4(-0.504400557f),g,vec_splatsf4(0.933933258f)); + denom = vec_madd(denom,g,vec_splatsf4(5.603603363f)); + num = vec_madd(vec_madd(x,g,vec_splatsf4(0.0f)),num,vec_splatsf4(0.0f)); + + + // x = x + num / denom + // + x = vec_add(x,divf4(num,denom)); + + // if (x > 0.5) + // x = x + M_PI_2 + // + x = vec_sel(x,vec_add(x,vec_splatsf4(1.57079632679489661923f)),gtHalf); + + + // if (!positive) x = -x + // + x = vec_sel((vec_float4)vec_xor(vec_splatsi4(0x80000000),(vec_int4)x),x,positive); + + return x; +} + diff --git a/Extras/simdmathlibrary/ppu/atan2f4.c b/Extras/simdmathlibrary/ppu/atan2f4.c new file mode 100644 index 000000000..72cda6833 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/atan2f4.c @@ -0,0 +1,61 @@ +/* atan2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + +// +// Inverse tangent function of two variables +// +vector float +atan2f4 (vector float y, vector float x) +{ + vec_float4 res = atanf4(divf4(y,x)); + + // Use the arguments to determine the quadrant of the result: + // if (x < 0) + // if (y < 0) + // res = -PI + res + // else + // res = PI + res + // + vec_uint4 yNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,y); + vec_uint4 xNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,x); + + vec_float4 bias = vec_sel(vec_splatsf4(3.14159265358979323846f),vec_splatsf4(-3.14159265358979323846f),yNeg); + + vec_float4 newRes = vec_add(bias, res); + + res = vec_sel(res,newRes,xNeg); + + return res; +} diff --git a/Extras/simdmathlibrary/ppu/atanf4.c b/Extras/simdmathlibrary/ppu/atanf4.c new file mode 100644 index 000000000..5fcea7f11 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/atanf4.c @@ -0,0 +1,83 @@ +/* atanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +// +// Computes the inverse tangent of all four slots of x. +// +vector float +atanf4 (vector float x) +{ + vec_float4 bias; + vec_float4 x2, x3, x4, x8, x9; + vec_float4 hi, lo; + vec_float4 result; + vec_float4 inv_x; + vec_uint4 sign; + vec_uint4 select; + vec_float4 xabs; + vec_float4 vzero = (vec_float4){0.0, 0.0, 0.0, 0.0}; + + sign = vec_and((vec_uint4)x, vec_splatsu4(0x80000000)); + xabs = (vec_float4)vec_andc((vec_uint4)x, vec_splatsu4(0x80000000)); + inv_x = recipf4(x); + inv_x = (vec_float4)vec_xor((vec_uint4)inv_x, vec_splatsu4(0x80000000)); + select = (vec_uint4)vec_cmpgt(xabs, ((vec_float4){1.0, 1.0, 1.0, 1.0}) ); + bias = (vec_float4)vec_or(sign, (vec_uint4)(vec_splatsf4(1.57079632679489661923f))); + bias = (vec_float4)vec_and((vec_uint4)bias, select); + + x = vec_sel(x, inv_x, select); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves(hi and lo). + */ + bias = vec_add(bias, x); + x2 = vec_madd(x, x, vzero); + x3 = vec_madd(x2, x, vzero); + x4 = vec_madd(x2, x2, vzero); + x8 = vec_madd(x4, x4, vzero); + x9 = vec_madd(x8, x, vzero); + hi = vec_madd(vec_splatsf4(0.0028662257), x2, vec_splatsf4(-0.0161657367)); + hi = vec_madd(hi, x2, vec_splatsf4(0.0429096138)); + hi = vec_madd(hi, x2, vec_splatsf4(-0.0752896400)); + hi = vec_madd(hi, x2, vec_splatsf4(0.1065626393)); + lo = vec_madd(vec_splatsf4(-0.1420889944), x2, vec_splatsf4(0.1999355085)); + lo = vec_madd(lo, x2, vec_splatsf4(-0.3333314528)); + lo = vec_madd(lo, x3, bias); + + result = vec_madd(hi, x9, lo); + return result; +} + diff --git a/Extras/simdmathlibrary/ppu/cbrtf4.c b/Extras/simdmathlibrary/ppu/cbrtf4.c new file mode 100644 index 000000000..4aa7d6312 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/cbrtf4.c @@ -0,0 +1,103 @@ +/* cbrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +#define __calcQuot(xexp) n = xexp; \ + vec_uint4 negxexpmask = (vec_uint4)vec_cmpgt( ((vec_int4){0, 0, 0, 0}) , n); \ + n = vec_sel(n, vec_add(n, ((vec_int4){2, 2, 2, 2}) ), negxexpmask); \ + \ + quot = vec_add(vec_sra(n, ((vec_uint4){2, 2, 2, 2}) ), vec_sra(n, ((vec_uint4){4, 4, 4, 4}) )); \ + quot = vec_add(quot, vec_sra(quot, ((vec_uint4){4, 4, 4, 4}) )); \ + quot = vec_add(quot, vec_sra(quot, ((vec_uint4){8, 8, 8, 8}) )); \ + quot = vec_add(quot, vec_sra(quot, ((vec_uint4){16, 16, 16, 16}) )); \ + vec_int4 r = vec_sub(vec_sub(n,quot), vec_sl(quot, ((vec_uint4){1, 1, 1, 1}) )); \ + quot = vec_add( \ + quot, \ + vec_sra( \ + vec_add( \ + vec_add(r,((vec_int4){5, 5, 5, 5})), \ + vec_sl (r,((vec_uint4){2, 2, 2, 2})) \ + ), \ + ((vec_uint4){4, 4, 4, 4}) \ + ) \ + ); \ + +#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3) +#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) + +vector float +cbrtf4 (vector float x) +{ + vec_float4 zeros = (vec_float4){0.0f, 0.0f, 0.0f, 0.0f}; + vec_int4 xexp, n; + vec_float4 sgnmask = (vec_float4)(vec_splatsi4(0x80000000)); + vec_uint4 negmask = (vec_uint4)vec_cmpgt(zeros, x); + x = vec_andc(x, sgnmask); + + x = frexpf4(x, &xexp); + vec_float4 p = vec_madd( + vec_madd(x, vec_splatsf4(-0.191502161678719066f), vec_splatsf4(0.697570460207922770f)), + x, + vec_splatsf4(0.492659620528969547f) + ); + vec_float4 p3 = vec_madd(p, vec_madd(p, p, zeros), zeros); + + vec_int4 quot; + __calcQuot(xexp); + vec_int4 modval = vec_sub(vec_sub(xexp,quot), vec_sl(quot,vec_splatsu4(1))); // mod = xexp - 3*quotient + vec_float4 factor = vec_splatsf4(1.0/_CBRTF_H_sqr_cbrt2); + factor = vec_sel(factor, vec_splatsf4(1.0/_CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4(-1))); + factor = vec_sel(factor, vec_splatsf4( 1.0), vec_cmpeq(modval,vec_splatsi4( 0))); + factor = vec_sel(factor, vec_splatsf4( _CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4( 1))); + factor = vec_sel(factor, vec_splatsf4(_CBRTF_H_sqr_cbrt2), vec_cmpeq(modval,vec_splatsi4( 2))); + + vec_float4 pre = vec_madd(p, factor, zeros); + vec_float4 numr = vec_madd(x , vec_splatsf4(2.0f), p3); + vec_float4 denr = vec_madd(p3, vec_splatsf4(2.0f), x ); + vec_float4 res = vec_madd(pre, divf4(numr, denr), zeros); + res = ldexpf4(res, quot); + + return vec_sel(res, vec_or(res,sgnmask), negmask); +} + +/* +_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x)) +{ + vec_uint4 neg = (vec_uint4)vec_cmpgt((vec_float4)(0.0f), x); + vec_float4 sbit = (vec_float4)(vec_float4)((int)0x80000000); + vec_float4 absx = vec_andc(x, sbit); + vec_float4 res = exp2f4(vec_mul((vec_float4)(0.3333333333333f), log2f4(absx))); + res = vec_sel(res, vec_or(sbit, res), neg); + return res; +} +*/ diff --git a/Extras/simdmathlibrary/ppu/ceilf4.c b/Extras/simdmathlibrary/ppu/ceilf4.c new file mode 100644 index 000000000..ebdc03f50 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/ceilf4.c @@ -0,0 +1,39 @@ +/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +ceilf4 (vector float x) +{ + return vec_ceil( x ); +} + diff --git a/Extras/simdmathlibrary/ppu/common-types.h b/Extras/simdmathlibrary/ppu/common-types.h new file mode 100644 index 000000000..83fc82041 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/common-types.h @@ -0,0 +1,52 @@ +/* Common types for PPU SIMD Math Library + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___COMMON_TYPES_H___ +#define ___COMMON_TYPES_H___ + +typedef vector float vec_float4; +typedef vector signed int vec_int4; +typedef vector unsigned int vec_uint4; + +static inline vec_float4 vec_splatsf4(const float x) +{ + return (vec_float4) {x, x, x, x}; +} + +static inline vec_int4 vec_splatsi4(const signed int x) +{ + return (vec_int4) {x, x, x, x}; +} + +static inline vec_uint4 vec_splatsu4(const unsigned int x) +{ + return (vec_uint4) {x, x, x, x}; +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/copysignf4.c b/Extras/simdmathlibrary/ppu/copysignf4.c new file mode 100644 index 000000000..458b34211 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/copysignf4.c @@ -0,0 +1,41 @@ +/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + +vector float +copysignf4 (vector float x, vector float y) +{ + return vec_sel( x, y, vec_splatsu4(0x80000000) ); +} + diff --git a/Extras/simdmathlibrary/ppu/cosf4.c b/Extras/simdmathlibrary/ppu/cosf4.c new file mode 100644 index 000000000..8b395fba2 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/cosf4.c @@ -0,0 +1,104 @@ +/* cosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "sincos_c.h" +#include "common-types.h" + + +// +// Computes the cosine of each of the four slots +// by using a polynomial approximation. +// +vector float +cosf4 (vector float x) +{ + vec_float4 xl,xl2,xl3,res; + vec_int4 q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = vec_add(vec_splatsi4(1),vec_and(q,vec_splatsi4((int)0x3))); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); + xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); + vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); + + vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); + vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); + + vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); + vec_float4 sx = vec_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset, + ((vec_int4){0x1, 0x1, 0x1, 0x1})), + ((vec_int4){0, 0, 0, 0})); + res = vec_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0)); + res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2); + + return res; + +} + diff --git a/Extras/simdmathlibrary/ppu/divf4.c b/Extras/simdmathlibrary/ppu/divf4.c new file mode 100644 index 000000000..b4e71cab0 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/divf4.c @@ -0,0 +1,47 @@ +/* divf4 - for each of four float slots, divide numer by denom. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + +vector float +divf4 (vector float numer, vector float denom) +{ + // Reciprocal estimate and 1 Newton-Raphson iteration. + + vector float y0, y0numer; + + y0 = vec_re( denom ); + y0numer = vec_madd( numer, y0, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + return vec_madd( vec_nmsub( denom, y0, ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f}) ), y0numer, y0numer ); +} + diff --git a/Extras/simdmathlibrary/ppu/divi4.c b/Extras/simdmathlibrary/ppu/divi4.c new file mode 100644 index 000000000..714bb02d9 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/divi4.c @@ -0,0 +1,103 @@ +/* divi4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + + +// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom +// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +divi4_t +divi4 (vec_int4 numer, vec_int4 denom ) +{ + vec_int4 minusone = vec_splatsi4(-1); + vec_uint4 zero = vec_splatsu4(0); + vec_uint4 one = vec_splatsu4(1); + vec_uint4 k158 = vec_splatsu4(158); + vec_uint4 k23 = vec_splatsu4(23); + + divi4_t res; + vec_uint4 numerPos, denomPos, quotNeg; + vec_uint4 numerAbs, denomAbs; + vec_uint4 denomZeros, numerZeros, shift, denomShifted, oneShifted; + vec_uint4 quot, newQuot, skip, newNum, cont; + int anyCont; + + // determine whether result needs sign change + + numerPos = (vec_uint4)vec_cmpgt( numer, minusone ); + denomPos = (vec_uint4)vec_cmpgt( denom, minusone ); + quotNeg = vec_xor( numerPos, denomPos ); + + // use absolute values of numerator, denominator + + numerAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, numer ), numer, numerPos ); + denomAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, denom ), denom, denomPos ); + + // get difference of leading zeros to align denom with numer + + denomZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( denomAbs, 0 ), k23 ) ); + numerZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( numerAbs, 0 ), k23 ) ); + + shift = vec_sub( denomZeros, numerZeros ); + denomShifted = vec_sl( denomAbs, shift ); + oneShifted = vec_sl( one, shift ); + oneShifted = vec_sel( oneShifted, zero, vec_or( vec_cmpeq( denomAbs, zero ), + vec_cmpgt( denomAbs, numerAbs ) ) ); + + // long division + + quot = zero; + + do + { + cont = (vec_uint4)vec_cmpgt( oneShifted, zero ); + anyCont = vec_any_gt( oneShifted, zero ); + skip = (vec_uint4)vec_cmpgt( denomShifted, numerAbs ); + + newQuot = vec_or( quot, oneShifted ); + newNum = vec_sub( numerAbs, denomShifted ); + + oneShifted = vec_sr( oneShifted, one ); + denomShifted = vec_sr( denomShifted, one ); + + quot = vec_sel( newQuot, quot, skip ); + numerAbs = vec_sel( numerAbs, newNum, vec_andc( cont, skip ) ); + } + while ( anyCont ); + + res.quot = (vec_int4)vec_sel( quot, vec_sub( zero, quot ), quotNeg ); + res.rem = (vec_int4)vec_sel( (vec_uint4)vec_sub( (vec_int4)zero, (vec_int4)numerAbs ), numerAbs, numerPos ); + return res; +} + diff --git a/Extras/simdmathlibrary/ppu/exp2f4.c b/Extras/simdmathlibrary/ppu/exp2f4.c new file mode 100644 index 000000000..8fe74b55a --- /dev/null +++ b/Extras/simdmathlibrary/ppu/exp2f4.c @@ -0,0 +1,138 @@ +/* exp2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "common-types.h" + +/* + * FUNCTION + * vec_float4 _exp2_v(vec_float4 x) + * + * DESCRIPTION + * _exp2_v computes 2 raised to the input vector x. Computation is + * performed by observing the 2^(a+b) = 2^a * 2^b. + * We decompose x into a and b (above) by letting. + * a = ceil(x), b = x - a; + * + * 2^a is easilty computed by placing a into the exponent + * or a floating point number whose mantissa is all zeros. + * + * 2^b is computed using the following polynomial approximation. + * (C. Hastings, Jr, 1955). + * + * __7__ + * \ + * \ + * 2^(-x) = / Ci*x^i + * /____ + * i=1 + * + * for x in the range 0.0 to 1.0 + * + * C0 = 1.0 + * C1 = -0.9999999995 + * C2 = 0.4999999206 + * C3 = -0.1666653019 + * C4 = 0.0416573475 + * C5 = -0.0083013598 + * C6 = 0.0013298820 + * C7 = -0.0001413161 + * + * This function does not handle out of range conditions. It + * assumes that x is in the range (-128.0, 127.0]. Values outside + * this range will produce undefined results. + */ + + +#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */ + +vector float +exp2f4 (vector float x) +{ + vec_int4 ix; + vec_uint4 overflow; + vec_uint4 underflow; + vec_float4 frac, frac2, frac4; + vec_float4 exp_int, exp_frac; + vec_float4 result; + vec_float4 hi, lo; + vec_float4 zeros = vec_splatsf4(0.0f); + vec_float4 bias; + /* Break in the input x into two parts ceil(x), x - ceil(x). + */ +#if 1 + bias = (vec_float4)(vec_sra((vec_int4)x, vec_splatsu4(31) )); + bias = (vec_float4)(vec_andc(vec_splatsu4(0x3F7FFFFF), (vec_uint4)bias)); + ix = vec_cts(vec_add(x, bias), 0); +#else + bias = vec_sel(vec_floor(x), vec_ceil(x), vec_cmpgt(x, vec_splatsf4(0.0f))); + ix = vec_cts(bias, 0); +#endif + frac = vec_sub(vec_ctf(ix, 0), x); + frac = vec_madd(frac, vec_splatsf4(_EXP2F_H_LN2), zeros); + + // !!! HRD Changing weird un-understandable and incorrect overflow handling code + //overflow = vec_sel((vec_uint4)(0x7FFFFFFF), (vec_uint4)x, (vec_uint4)(0x80000000) ); + overflow = (vec_uint4)vec_cmpgt(x, (vec_float4)(vec_splatsi4(0x4300FFFF))); // !!! Biggest possible exponent to fit in range. + underflow = (vec_uint4)vec_cmpgt(vec_splatsf4(-126.0f), x); + + //exp_int = (vec_float4)(vec_sl(vec_add(ix, (vec_int4)(127)), (vec_uint4)(23))); // !!! HRD <- changing this to correct for + // !!! overflow (x >= 127.999999f) + exp_int = (vec_float4)(vec_sl(vec_add(ix, vec_splatsi4(126)), vec_splatsu4(23))); // !!! HRD <- add with saturation + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + frac2 = vec_madd(frac, frac, zeros); + frac4 = vec_madd(frac2, frac2, zeros); + + hi = vec_madd(frac, vec_splatsf4(-0.0001413161), vec_splatsf4(0.0013298820)); + hi = vec_madd(frac, hi, vec_splatsf4(-0.0083013598)); + hi = vec_madd(frac, hi, vec_splatsf4(0.0416573475)); + lo = vec_madd(frac, vec_splatsf4(-0.1666653019), vec_splatsf4(0.4999999206)); + lo = vec_madd(frac, lo, vec_splatsf4(-0.9999999995)); + lo = vec_madd(frac, lo, vec_splatsf4(1.0)); + + exp_frac = vec_madd(frac4, hi, lo); + //ix = vec_add(ix, vec_sr((vec_int4)(exp_frac), (vec_uint4)(23) )); + result = vec_madd(exp_frac, exp_int, zeros); + result = vec_madd(exp_frac, exp_int, result); // !!! HRD + + /* Handle overflow */ + result = vec_sel(result, vec_splatsf4(HUGE_VALF), overflow); + result = vec_sel(result, zeros, underflow); + //result = vec_sel(result, (vec_float4)(overflow), vec_cmpgt((vec_uint4)(ix), (vec_uint4)(255))); + + return (result); +} diff --git a/Extras/simdmathlibrary/ppu/expf4.c b/Extras/simdmathlibrary/ppu/expf4.c new file mode 100644 index 000000000..00540fe8d --- /dev/null +++ b/Extras/simdmathlibrary/ppu/expf4.c @@ -0,0 +1,65 @@ +/* expf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +#define _EXPF_H_C1 ((float)-0.6931470632553101f) +#define _EXPF_H_C2 ((float)-1.1730463525082e-7f) + +#define _EXPF_H_INVLN2 ((float)1.4426950408889634f) + +vector float +expf4 (vector float x) +{ + vec_float4 zeros = vec_splatsf4(0.0f); + vec_uint4 xnegmask = (vec_uint4)vec_cmpgt(zeros, x); + vec_float4 goffset = vec_sel(vec_splatsf4( 0.5f),vec_splatsf4(-0.5f),xnegmask); + vec_float4 g = vec_madd(x, vec_splatsf4(_EXPF_H_INVLN2), zeros); + vec_int4 xexp = vec_cts(vec_add(g, goffset),0); + + g = vec_ctf(xexp, 0); + g = vec_madd(g, vec_splatsf4(_EXPF_H_C2), vec_madd(g, vec_splatsf4(_EXPF_H_C1), x)); + vec_float4 z = vec_madd(g, g, zeros); + vec_float4 a = vec_madd(z, vec_splatsf4(0.0999748594f), zeros); + vec_float4 b = vec_madd(g, + vec_madd(z, + vec_splatsf4(0.0083208258f), + vec_splatsf4(0.4999999992f) + ), + zeros); + + vec_float4 foo = divf4(vec_add(vec_splatsf4(1.0f), vec_add(a, b)), + vec_add(vec_splatsf4(1.0f), vec_sub(a, b))); + + return ldexpf4(foo, xexp); + +} diff --git a/Extras/simdmathlibrary/ppu/expm1f4.c b/Extras/simdmathlibrary/ppu/expm1f4.c new file mode 100644 index 000000000..d81942f00 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/expm1f4.c @@ -0,0 +1,57 @@ +/* expm1f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f) +#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f) + +vector float +expm1f4 (vector float x) +{ + vec_float4 zeros = vec_splatsf4(0.0f); + vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(_EXPM1F_H_ln1by2)), + vec_cmpgt(vec_splatsf4(_EXPM1F_H_ln3by2), x)); + vec_float4 x2 = vec_madd(x,x,zeros); + vec_float4 d0, d1, n0, n1; + + d0 = vec_madd(x , vec_splatsf4(-0.3203561199f), vec_splatsf4(0.9483177697f)); + d1 = vec_madd(x2, vec_splatsf4( 0.0326527809f), d0); + + n0 = vec_madd(x , vec_splatsf4(0.1538026623f), vec_splatsf4(0.9483177732f)); + n1 = vec_madd(x , vec_splatsf4(0.0024490478f), vec_splatsf4(0.0305274668f)); + n1 = vec_madd(x2, n1, n0); + + return vec_sel(vec_sub(expf4(x), vec_splatsf4(1.0f)), + vec_madd(x, divf4(n1, d1), zeros), + nearzeromask); +} diff --git a/Extras/simdmathlibrary/ppu/fabsf4.c b/Extras/simdmathlibrary/ppu/fabsf4.c new file mode 100644 index 000000000..4c0aa46e7 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fabsf4.c @@ -0,0 +1,38 @@ +/* fabsf4 - for each of four float slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +vector float fabsf4(vector float x) +{ + return vec_abs( x ); +} + diff --git a/Extras/simdmathlibrary/ppu/fdimf4.c b/Extras/simdmathlibrary/ppu/fdimf4.c new file mode 100644 index 000000000..5d230a92b --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fdimf4.c @@ -0,0 +1,39 @@ +/* fdimf - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +fdimf4 (vector float x, vector float y) +{ + vector float diff = vec_sub(x,y); + return vec_sel(((vector float){0.0f, 0.0f, 0.0f, 0.0f}), diff, vec_cmpgt(x,y)); +} diff --git a/Extras/simdmathlibrary/ppu/floorf4.c b/Extras/simdmathlibrary/ppu/floorf4.c new file mode 100644 index 000000000..d3d81663e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/floorf4.c @@ -0,0 +1,39 @@ +/* floorf4 - for each of four float slots, round down to largest integer not greater than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +floorf4 (vector float x) +{ + return vec_floor( x ); +} + diff --git a/Extras/simdmathlibrary/ppu/fmaf4.c b/Extras/simdmathlibrary/ppu/fmaf4.c new file mode 100644 index 000000000..7bf2f62ea --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fmaf4.c @@ -0,0 +1,37 @@ +/* fmaf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +fmaf4 (vector float x, vector float y, vector float z) +{ + return vec_madd(x,y,z); +} diff --git a/Extras/simdmathlibrary/ppu/fmaxf4.c b/Extras/simdmathlibrary/ppu/fmaxf4.c new file mode 100644 index 000000000..a236182cd --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fmaxf4.c @@ -0,0 +1,40 @@ +/* fmaxf4 - for each of four float slots, compute maximum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +vector float +fmaxf4 (vector float x, vector float y) +{ + return vec_max( x, y ); +} + diff --git a/Extras/simdmathlibrary/ppu/fminf4.c b/Extras/simdmathlibrary/ppu/fminf4.c new file mode 100644 index 000000000..ac877827c --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fminf4.c @@ -0,0 +1,39 @@ +/* fminf4 - for each of four float slots, compute minimum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +fminf4 (vector float x, vector float y) +{ + return vec_min( x, y ); +} + diff --git a/Extras/simdmathlibrary/ppu/fmodf4.c b/Extras/simdmathlibrary/ppu/fmodf4.c new file mode 100644 index 000000000..0210fcf88 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/fmodf4.c @@ -0,0 +1,82 @@ +/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +// +// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. +// If x == 0, the result is 0. +// If x != 0 and y == 0, the result is undefined. + +vector float +fmodf4 (vector float x, vector float y) +{ + vec_float4 q, xabs, yabs, qabs, xabs2; + vec_int4 qi0, qi1, qi2; + vec_float4 i0, i1, i2, r1, r2, i; + vec_uint4 inrange; + + // Find i = truncated_integer(|x/y|) + + // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. + // Thus i is either the truncated quotient, one less, or one greater. + + q = divf4( x, y ); + xabs = fabsf4( x ); + yabs = fabsf4( y ); + qabs = fabsf4( q ); + xabs2 = vec_add( xabs, xabs ); + + inrange = (vec_uint4)vec_cmpgt( (vec_float4)(vec_splatsu4(0x49800000)), qabs ); + + qi1 = vec_cts( qabs, 0 ); + qi0 = vec_add( qi1, ((vec_int4){-1, -1, -1, -1}) ); + qi2 = vec_add( qi1, ((vec_int4){1, 1, 1, 1}) ); + + i0 = vec_ctf( qi0, 0 ); + i1 = vec_ctf( qi1, 0 ); + i2 = vec_ctf( qi2, 0 ); + + // Correct i will be the largest one such that |x| - i*|y| >= 0. + + r1 = vec_nmsub( i1, yabs, xabs ); + r2 = vec_nmsub( i2, yabs, xabs ); + + i = i0; + i = vec_sel( i1, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r1 ) ); + i = vec_sel( i2, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r2 ) ); + + i = copysignf4( i, q ); + + return vec_sel( vec_splatsf4(0.0f), vec_nmsub( i, y, x ), inrange ); +} + diff --git a/Extras/simdmathlibrary/ppu/frexpf4.c b/Extras/simdmathlibrary/ppu/frexpf4.c new file mode 100644 index 000000000..61124a37c --- /dev/null +++ b/Extras/simdmathlibrary/ppu/frexpf4.c @@ -0,0 +1,57 @@ +/* frexpf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +frexpf4 (vector float x, vector signed int *exp) +{ + vec_int4 zeros = (vec_int4){0,0,0,0}; + vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = vec_splatsi4(0x7F800000); + vec_int4 e1 = vec_and ( (vec_int4)x, expmask); + vec_int4 e2 = vec_sub(vec_sr(e1, vec_splatsu4(23) ), vec_splatsi4(126) ); + *exp = vec_sel(e2, zeros, zeromask); + + vec_float4 m2 = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + + return vec_sel(m2, (vec_float4)zeros, zeromask); +} + + +/* +{ + *exp = ((vec_int4)(0)); + return ((vec_float4)(0.0f)); +} +*/ diff --git a/Extras/simdmathlibrary/ppu/hypotf4.c b/Extras/simdmathlibrary/ppu/hypotf4.c new file mode 100644 index 000000000..c7677a5b1 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/hypotf4.c @@ -0,0 +1,41 @@ +/* hypotf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +hypotf4 (vector float x, vector float y) +{ + vec_float4 sum = vec_madd(x,x, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + sum = vec_madd(y,y,sum); + return sqrtf4(sum); +} diff --git a/Extras/simdmathlibrary/ppu/ilogbf4.c b/Extras/simdmathlibrary/ppu/ilogbf4.c new file mode 100644 index 000000000..86c269aeb --- /dev/null +++ b/Extras/simdmathlibrary/ppu/ilogbf4.c @@ -0,0 +1,47 @@ +/* ilogbf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "common-types.h" + +vector signed int +ilogbf4 (vector float x) +{ + vec_int4 minus127 = vec_splatsi4(-127); + + vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000)); + vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0)); + vec_int4 e2 = vec_add(vec_sr(e1,vec_splatsu4(23)), minus127); + + return vec_sel(e2, vec_splatsi4(FP_ILOGB0), zeromask); + +} diff --git a/Extras/simdmathlibrary/ppu/ldexpf4.c b/Extras/simdmathlibrary/ppu/ldexpf4.c new file mode 100644 index 000000000..b542effca --- /dev/null +++ b/Extras/simdmathlibrary/ppu/ldexpf4.c @@ -0,0 +1,58 @@ +/* ldexpf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +ldexpf4 (vector float x, vector signed int exp) +{ + vec_int4 zeros = vec_splatsi4(0); + + vec_int4 expmask = vec_splatsi4(0x7F800000); + vec_int4 e1 = vec_and((vec_int4)x, expmask); + vec_int4 e2 = vec_sr(e1,vec_splatsu4(23)); + + vec_uint4 maxmask = (vec_uint4)vec_cmpgt(exp, vec_splatsi4(255)); + vec_uint4 minmask = (vec_uint4)vec_cmpgt(vec_splatsi4(-255), exp); + minmask = vec_or (minmask, (vec_uint4)vec_cmpeq(x, (vec_float4)zeros)); + + vec_int4 esum = vec_add(e2, exp); + + maxmask = vec_or (maxmask, (vec_uint4)vec_cmpgt(esum, vec_splatsi4(255))); + maxmask = vec_and(maxmask, vec_splatsu4(0x7FFFFFFF)); + minmask = vec_or (minmask, (vec_uint4)vec_cmpgt(zeros, esum)); + + x = vec_sel(x, (vec_float4)vec_sl(esum,vec_splatsu4(23)), (vec_uint4)expmask); + x = vec_sel(x, (vec_float4)zeros, minmask); + x = vec_sel(x, (vec_float4)maxmask, maxmask); + return x; +} diff --git a/Extras/simdmathlibrary/ppu/log10f4.c b/Extras/simdmathlibrary/ppu/log10f4.c new file mode 100644 index 000000000..32691b523 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/log10f4.c @@ -0,0 +1,82 @@ +/* log10f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +#include "common-types.h" + +#define _LOG10F_H_loga2msb ((float) 0.3010299205780f) +#define _LOG10F_H_loga2lsb ((float) 7.5085978266e-8f) +#define _LOG10F_H_logaemsb ((float) 0.4342944622040f) +#define _LOG10F_H_logaelsb ((float) 1.9699272335e-8f) +#define _LOG10F_H_neglogae ((float)-0.4342944819033f) + +#define _LOG10F_H_c0 ((float)(-0.2988439998f)) +#define _LOG10F_H_c1 ((float)(-0.3997655209f)) +#define _LOG10F_H_c2 ((float)(-0.6666679125f)) + +vector float +log10f4 (vector float x) +{ + vec_int4 zeros = vec_splatsi4(0); + vec_float4 ones = vec_splatsf4(1.0f); + //vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = vec_splatsi4(0x7F800000); + vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); + x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + + vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x); + x = vec_sel(x , vec_add(x, x) , mask); + xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1)), mask); + + vec_float4 x1 = vec_sub(x , ones); + vec_float4 z = divf4 (x1, vec_add(x, ones)); + vec_float4 w = vec_madd(z , z, (vec_float4)zeros); + vec_float4 polywneg; + polywneg = vec_madd(vec_splatsf4(_LOG10F_H_c0), w, vec_splatsf4(_LOG10F_H_c1)); + polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG10F_H_c2)); + + vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); + vec_float4 wnew = vec_ctf(xexp,0); + + vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG10F_H_logaemsb), x1, + vec_madd(vec_splatsf4(_LOG10F_H_loga2msb),wnew,(vec_float4)zeros)); + vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG10F_H_logaelsb), x1, + vec_madd(vec_splatsf4(_LOG10F_H_loga2lsb), wnew, + vec_madd(vec_splatsf4(_LOG10F_H_neglogae),y,(vec_float4)zeros)) + ); + + //return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask); + return vec_add(zz1, zz2); +} + + diff --git a/Extras/simdmathlibrary/ppu/log1pf4.c b/Extras/simdmathlibrary/ppu/log1pf4.c new file mode 100644 index 000000000..3ac1971ea --- /dev/null +++ b/Extras/simdmathlibrary/ppu/log1pf4.c @@ -0,0 +1,54 @@ +/* log1pf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +log1pf4 (vector float x) +{ + vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(-0.5f)), + vec_cmpgt(vec_splatsf4(0.5f), x)); + vec_float4 x2 = vec_madd(x,x,vec_splatsf4(0.0f)); + vec_float4 d0, d1, n0, n1; + + d0 = vec_madd(x , vec_splatsf4(1.5934420741f), vec_splatsf4(0.8952856868f)); + d1 = vec_madd(x , vec_splatsf4(0.1198195734f), vec_splatsf4(0.8377145063f)); + d1 = vec_madd(x2, d1, d0); + + n0 = vec_madd(x , vec_splatsf4(1.1457993413f), vec_splatsf4(0.8952856678f)); + n1 = vec_madd(x , vec_splatsf4(0.0082862580f), vec_splatsf4(0.3394238808f)); + n1 = vec_madd(x2, n1, n0); + + return vec_sel(logf4(vec_add(x, vec_splatsf4(1.0f))), + vec_madd(x, divf4(n1, d1), vec_splatsf4(0.0f)), + nearzeromask); +} diff --git a/Extras/simdmathlibrary/ppu/log2f4.c b/Extras/simdmathlibrary/ppu/log2f4.c new file mode 100644 index 000000000..1d065ff4d --- /dev/null +++ b/Extras/simdmathlibrary/ppu/log2f4.c @@ -0,0 +1,80 @@ +/* log2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + +#define _LOG2F_H_l2emsb ((float) 1.4426950216293f) +#define _LOG2F_H_l2elsb ((float) 1.9259629911e-8f) +#define _LOG2F_H_negl2e ((float)-1.4426950408890f) + +#define _LOG2F_H_c0 ((float)(-0.2988439998f)) +#define _LOG2F_H_c1 ((float)(-0.3997655209f)) +#define _LOG2F_H_c2 ((float)(-0.6666679125f)) + +vector float +log2f4 (vector float x) +{ + vec_int4 zeros = vec_splatsi4(0); + vec_float4 ones = vec_splatsf4(1.0f); + //vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = vec_splatsi4(0x7F800000); + vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); + x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + + vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x); + x = vec_sel(x , vec_add(x, x) , mask); + xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1) ), mask); + + vec_float4 x1 = vec_sub(x , ones); + vec_float4 z = divf4(x1, vec_add(x, ones)); + vec_float4 w = vec_madd(z , z, (vec_float4)zeros); + vec_float4 polywneg; + polywneg = vec_madd(vec_splatsf4(_LOG2F_H_c0), w, vec_splatsf4(_LOG2F_H_c1)); + polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG2F_H_c2)); + + vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); + vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG2F_H_l2emsb), x1, vec_ctf(xexp,0)); + vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG2F_H_l2elsb), x1, + vec_madd(vec_splatsf4(_LOG2F_H_negl2e), y, (vec_float4)zeros) + ); + + //return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask); + return vec_add(zz1,zz2); +} + +/* +{ + return ((vec_float4)(0.0f)); +} +*/ diff --git a/Extras/simdmathlibrary/ppu/logbf4.c b/Extras/simdmathlibrary/ppu/logbf4.c new file mode 100644 index 000000000..fac06165e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/logbf4.c @@ -0,0 +1,44 @@ +/* logbf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "common-types.h" + +vector float +logbf4 (vector float x) +{ + vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000)); + vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0)); + e1 = vec_sub(e1, vec_splatsi4(0x3F800000)); + return vec_sel(vec_ctf(e1,23), vec_splatsf4(-HUGE_VALF), zeromask); +} + diff --git a/Extras/simdmathlibrary/ppu/logf4.c b/Extras/simdmathlibrary/ppu/logf4.c new file mode 100644 index 000000000..8b915c6a8 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/logf4.c @@ -0,0 +1,73 @@ +/* logf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + + +#define _LOGF_H_ln2msb ((float)(0.6931470632553f)) +#define _LOGF_H_negln2lsb ((float)(-1.1730463525e-7f)) + +#define _LOGF_H_c0 ((float)(-0.2988439998f)) +#define _LOGF_H_c1 ((float)(-0.3997655209f)) +#define _LOGF_H_c2 ((float)(-0.6666679125f)) + +vector float +logf4 (vector float x) +{ + vec_int4 zeros = vec_splatsi4(0); + vec_float4 ones = vec_splatsf4(1.0f); + //vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = vec_splatsi4(0x7F800000); + vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); + x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + + + vec_uint4 mask = (vec_uint4)vec_cmpgt(vec_splatsf4((float)0.7071067811865f), x); + x = vec_sel(x , vec_add(x, x) , mask); + xexp = vec_sel(xexp, vec_sub(xexp,vec_splatsi4(1)), mask); + + vec_float4 x1 = vec_sub(x , ones); + vec_float4 z = divf4 (x1, vec_add(x, ones)); + vec_float4 w = vec_madd(z , z, (vec_float4)zeros); + vec_float4 polywneg; + polywneg = vec_madd(vec_splatsf4(_LOGF_H_c0), w, vec_splatsf4(_LOGF_H_c1)); + polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOGF_H_c2)); + + vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); + vec_float4 wnew = vec_ctf(xexp,0); + vec_float4 zz1 = vec_madd(vec_splatsf4(_LOGF_H_ln2msb) , wnew, x1); + vec_float4 zz2neg = vec_madd(vec_splatsf4(_LOGF_H_negln2lsb), wnew, y ); + + //return vec_sel(vec_sub(zz1,zz2neg), (vec_float4)zeromask, zeromask); + return vec_sub(zz1,zz2neg); +} diff --git a/Extras/simdmathlibrary/ppu/modff4.c b/Extras/simdmathlibrary/ppu/modff4.c new file mode 100644 index 000000000..54ff1ba72 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/modff4.c @@ -0,0 +1,47 @@ +/* modff4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// modff4 - for each of four float slots, compute fractional and integral parts. +// Returns fractional part and stores integral part in *iptr. + +vector float +modff4 (vector float x, vector float *iptr) +{ + vector float integral, fraction; + + integral = truncf4( x ); + fraction = vec_sub( x, integral ); + + *iptr = integral; + return fraction; +} + diff --git a/Extras/simdmathlibrary/ppu/negatef4.c b/Extras/simdmathlibrary/ppu/negatef4.c new file mode 100644 index 000000000..74447c90e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/negatef4.c @@ -0,0 +1,40 @@ +/* negatef4 - for each of four float slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +negatef4 (vector float x) +{ + return (vector float)vec_xor( (vector unsigned int)x, vec_splatsu4(0x80000000) ); +} + diff --git a/Extras/simdmathlibrary/ppu/negatei4.c b/Extras/simdmathlibrary/ppu/negatei4.c new file mode 100644 index 000000000..7d538053b --- /dev/null +++ b/Extras/simdmathlibrary/ppu/negatei4.c @@ -0,0 +1,39 @@ +/* negatei4 - for each of four int slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed int +negatei4 (vector signed int x) +{ + vector signed int zero = (vector signed int){0, 0, 0, 0}; + return vec_sub (zero, x); +} + diff --git a/Extras/simdmathlibrary/ppu/powf4.c b/Extras/simdmathlibrary/ppu/powf4.c new file mode 100644 index 000000000..f290219cf --- /dev/null +++ b/Extras/simdmathlibrary/ppu/powf4.c @@ -0,0 +1,74 @@ +/* exp2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "common-types.h" + +vector float +powf4 (vector float x, vector float y) +{ + vec_int4 zeros = (vec_int4){0,0,0,0}; + vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x); + + vec_uint4 negmask = (vec_uint4)vec_cmpgt((vec_float4)zeros, x); + + vec_float4 sbit = (vec_float4)(vec_splatsi4(0x80000000)); + vec_float4 absx = vec_andc(x, sbit); + vec_float4 absy = vec_andc(y, sbit); + vec_uint4 oddy = vec_and(vec_ctu(absy, 0), vec_splatsu4(0x00000001)); + negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros)); + + vec_float4 res = exp2f4(vec_madd(y, log2f4(absx), (vec_float4)zeros)); + res = vec_sel(res, vec_or(sbit, res), negmask); + + + return vec_sel(res, (vec_float4)zeros, zeromask); +} + +/* +{ + vec_int4 zeros = vec_splats(0); + vec_int4 ones = (vec_int4)vec_splats((char)0xFF); + vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x); + vec_uint4 onemask = (vec_uint4)vec_cmpeq((vec_float4)ones , y); + vec_uint4 negmask = (vec_uint4)vec_cmpgt(vec_splats(0.0f), x); + vec_float4 sbit = (vec_float4)(vec_int4)(0x80000000); + vec_float4 absx = vec_andc(x, sbit); + vec_float4 absy = vec_andc(y, sbit); + vec_uint4 oddy = vec_and(vec_convtu(absy, 0), (vec_uint4)vec_splats(0x00000001)); + negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros)); + + + +} + +*/ diff --git a/Extras/simdmathlibrary/ppu/recipf4.c b/Extras/simdmathlibrary/ppu/recipf4.c new file mode 100644 index 000000000..38f0f1550 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/recipf4.c @@ -0,0 +1,46 @@ +/* recipf4 - for each of four float slots, compute reciprocal. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +vector float +recipf4 (vector float x) +{ + // Reciprocal estimate and 1 Newton-Raphson iteration. + + vec_float4 y0; + vec_float4 ones = ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f}); + + y0 = vec_re( x ); + return vec_madd( vec_nmsub( x, y0, ones), y0, y0 ); +} + diff --git a/Extras/simdmathlibrary/ppu/rsqrtf4.c b/Extras/simdmathlibrary/ppu/rsqrtf4.c new file mode 100644 index 000000000..a78e52a1a --- /dev/null +++ b/Extras/simdmathlibrary/ppu/rsqrtf4.c @@ -0,0 +1,53 @@ +/* sqrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "common-types.h" + +// rsqrtf4 - for each of four float slots, compute reciprocal square root. +// Undefined if input < 0. + +vector float +rsqrtf4 (vector float x) +{ + // Reciprocal square root estimate and 1 Newton-Raphson iteration. + + vector float zero = vec_splatsf4(0.0f); + vector float half = vec_splatsf4(0.5f); + vector float one = vec_splatsf4(1.0f); + vector float y0, y0x, y0half; + + y0 = vec_rsqrte( x ); + y0x = vec_madd( y0, x, zero ); + y0half = vec_madd( y0, half, zero ); + return vec_madd( vec_nmsub( y0, y0x, one ), y0half, y0 ); +} + diff --git a/Extras/simdmathlibrary/ppu/sincos_c.h b/Extras/simdmathlibrary/ppu/sincos_c.h new file mode 100644 index 000000000..ab19b0eb0 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/sincos_c.h @@ -0,0 +1,96 @@ +/* Common constants for Sin/Cos/Tan + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SINCOS_C2__ +#define __SINCOS_C2__ + +// +// Common constants used to evaluate sind2/cosd2/tand2 +// +#define _SINCOS_CC0D 0.00000000206374484196 +#define _SINCOS_CC1D -0.00000027555365134677 +#define _SINCOS_CC2D 0.00002480157946764225 +#define _SINCOS_CC3D -0.00138888888730525966 +#define _SINCOS_CC4D 0.04166666666651986722 +#define _SINCOS_CC5D -0.49999999999999547304 + +#define _SINCOS_SC0D 0.00000000015893606014 +#define _SINCOS_SC1D -0.00000002505069049138 +#define _SINCOS_SC2D 0.00000275573131527032 +#define _SINCOS_SC3D -0.00019841269827816117 +#define _SINCOS_SC4D 0.00833333333331908278 +#define _SINCOS_SC5D -0.16666666666666612594 + +#define _SINCOS_KC1D (13176794.0 / 8388608.0) +#define _SINCOS_KC2D 7.5497899548918821691639751442098584e-8 + + +// +// Common constants used to evaluate sinf4/cosf4/tanf4 +// +#define _SINCOS_CC0 -0.0013602249f +#define _SINCOS_CC1 0.0416566950f +#define _SINCOS_CC2 -0.4999990225f +#define _SINCOS_SC0 -0.0001950727f +#define _SINCOS_SC1 0.0083320758f +#define _SINCOS_SC2 -0.1666665247f + +#define _SINCOS_KC1 1.57079625129f +#define _SINCOS_KC2 7.54978995489e-8f + +// +// Common constants used to evaluate sinf4est/cosf4est +// +#define _SINCOS_R1 -0.1666665668f +#define _SINCOS_R2 0.8333025139e-2f +#define _SINCOS_R3 -0.1980741872e-3f +#define _SINCOS_R4 0.2601903036e-5f + +#define _SINCOS_C1 (201.0f/64.0f) +#define _SINCOS_C2 9.67653589793e-4f + + +// common constants used to evaluate sinf/cosf + +#define _SIN_C1 -0.35950439e-4f +#define _SIN_C2 0.2490001007e-2f +#define _SIN_C3 -0.8074543253e-1f +#define _SIN_C4 0.7853981633f + +#define _COS_C1 -0.31872783e-3f +#define _COS_C2 0.1584968416e-1f +#define _COS_C3 -0.30842416558f +#define _COS_C4 0.9999999673f + +#define POW2(x) x*x +#define SPOLY(x) (((_SIN_C1 * POW2(x) + _SIN_C2) * POW2(x) + (_SIN_C3)) * POW2(x) + _SIN_C4) * x +#define CPOLY(x) (((_COS_C1 * POW2(x) + _COS_C2) * POW2(x) + (_COS_C3)) * POW2(x) + _COS_C4) + +#define M_PI 3.141592653589793f +#endif diff --git a/Extras/simdmathlibrary/ppu/sincosf4.c b/Extras/simdmathlibrary/ppu/sincosf4.c new file mode 100644 index 000000000..19efeed30 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/sincosf4.c @@ -0,0 +1,106 @@ +/* sincosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "sincos_c.h" +#include "common-types.h" + +// +// Computes both the sine and cosine of the all four slots of x +// by using a polynomial approximation. +// +void sincosf4 (vector float x, vector float *s, vector float *c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 q; + vec_int4 offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = vec_and(q,vec_splatsi4((int)0x3)); + offsetCos = vec_add(vec_splatsi4(1),offsetSin); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); + xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); + vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); + + vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); + vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); + + vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); + vec_float4 sx = vec_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x1)),vec_splatsi4(0)); + vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x1)),vec_splatsi4(0)); + *s = vec_sel(cx,sx,sinMask); + *c = vec_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x2)),vec_splatsi4(0)); + cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x2)),vec_splatsi4(0)); + + *s = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*s),*s,sinMask); + *c = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*c),*c,cosMask); + +} + diff --git a/Extras/simdmathlibrary/ppu/sinf4.c b/Extras/simdmathlibrary/ppu/sinf4.c new file mode 100644 index 000000000..c382e85bd --- /dev/null +++ b/Extras/simdmathlibrary/ppu/sinf4.c @@ -0,0 +1,103 @@ +/* sinf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "sincos_c.h" +#include "common-types.h" +// +// Computes the sine of each of the four slots +// by using a polynomial approximation. +// + +vector float +sinf4 (vector float x) +{ + vec_float4 xl,xl2,xl3,res; + vec_int4 q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = vec_and(q,vec_splatsi4((int)0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); + xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); + vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); + + vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); + vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); + + vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); + vec_float4 sx = vec_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset, + vec_splatsi4(0x1)), + vec_splatsi4((int)(0))); + res = vec_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0)); + res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2); + + return res; + +} + diff --git a/Extras/simdmathlibrary/ppu/sqrtf4.c b/Extras/simdmathlibrary/ppu/sqrtf4.c new file mode 100644 index 000000000..e40b4c841 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/sqrtf4.c @@ -0,0 +1,53 @@ +/* sqrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// sqrtf4 - for each of four float slots, compute square root. +// Undefined if input < 0. + +vector float +sqrtf4 (vector float x) +{ + // Reciprocal square root estimate and 1 Newton-Raphson iteration. + + vector float zero = (vector float){0.0f, 0.0f, 0.0f, 0.0f}; + vector float half = (vector float){0.5f, 0.5f, 0.5f, 0.5f}; + vector float one = (vector float){1.0f, 1.0f, 1.0f, 1.0f}; + vector float y0, y0x, y0xhalf; + vector unsigned int cmp_zero; + + y0 = vec_rsqrte( x ); + cmp_zero = (vector unsigned int)vec_cmpeq( x, zero ); + y0x = vec_madd( y0, x, zero ); + y0xhalf = vec_madd( y0x, half, zero ); + return vec_sel( vec_madd( vec_nmsub( y0, y0x, one ), y0xhalf, y0x ), zero, cmp_zero ); +} + diff --git a/Extras/simdmathlibrary/ppu/tanf4.c b/Extras/simdmathlibrary/ppu/tanf4.c new file mode 100644 index 000000000..8f3f6de09 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tanf4.c @@ -0,0 +1,96 @@ +/* tanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sincos_c.h" + + +#include "common-types.h" + +#define _TAN_KC1 1.57079625129f +#define _TAN_KC2 7.54978995489e-8f + +// +// Computes the tangent of all four slots of x by using a polynomia approximation. +// +vector float +tanf4 (vector float x) +{ + vector float xl,xl2,xl3,res; + vector signed int q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Remainder in range [-pi/4..pi/4] + // + vector float qf = vec_ctf(q,0); + vector float p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); + xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * (C0 * x2 + C1), and + // sx = xl + x3 * S0 + // + vector float ct2 = vec_madd(vec_splatsf4( 0.0097099364f),xl2,vec_splatsf4(-0.4291161787f)); + + vector float cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); + vector float sx = vec_madd(vec_splatsf4(-0.0957822992f),xl3,xl); + + + // Compute both cx/sx and sx/cx + // + vector float cxosx = divf4(cx,sx); + vector float sxocx = divf4(sx,cx); + + vector float ncxosx = (vector float)vec_xor(vec_splatsu4(0x80000000),(vector unsigned int)cxosx); + + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vector unsigned int mask = (vector unsigned int)vec_cmpeq(vec_and(q,vec_splatsi4(0x1)),vec_splatsi4(0)); + res = vec_sel(ncxosx,sxocx,mask); + + return res; +} diff --git a/Extras/simdmathlibrary/ppu/tests/Makefile b/Extras/simdmathlibrary/ppu/tests/Makefile new file mode 100644 index 000000000..abdd85df3 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/Makefile @@ -0,0 +1,131 @@ +# Makefile for testsuite for the PPU SIMD math library +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +TESTS = fabsf4 absi4 truncf4 sqrtf4 negatef4 \ + copysignf4 modff4 fminf4_fmaxf4 \ + floorf4 recipf4 ceilf4 divf4 divi4 \ + rsqrtf4 fmodf4 negatei4 + +STATIC_TESTS = $(TESTS) +SHARED_TESTS = $(TESTS:=.shared) +ALL_TESTS = $(STATIC_TESTS) $(SHARED_TESTS) + +INCLUDES_PPU = -I../../ + +ARCH_PPU = 64 +CROSS_PPU = ppu- +AR_PPU = $(CROSS_PPU)ar +CC_PPU = $(CROSS_PPU)gcc +CXX_PPU = $(CROSS_PPU)g++ +TEST_CMD_PPU = + +ARCH_CFLAGS_PPU = -m$(ARCH_PPU) -maltivec -mabi=altivec +CFLAGS_PPU = $(INCLUDES_PPU) -O2 -W -Wall $(ARCH_CFLAGS_PPU) +STATIC_LDFLAGS_PPU = -static +SHARED_LDFLAGS_PPU = -Wl,-rpath=.. +LDFLAGS_PPU = $(ARCH_CFLAGS_PPU) -L../ -l$(LIB_BASE) -lm + +MAKE_DEFS = \ + LIB_BASE='$(LIB_BASE)' \ + LIB_NAME='$(LIB_NAME)' \ + STATIC_LIB='$(STATIC_LIB)' \ + SHARED_LIB='$(SHARED_LIB)' \ + ARCH_PPU='$(ARCH_PPU)' \ + ARCH_CFLAGS_PPU='$(ARCH_CFLAGS_PPU)' \ + CROSS_PPU='$(CROSS_PPU)' \ + AR_PPU='$(AR_PPU)' \ + CC_PPU='$(CC_PPU)' \ + CXX_PPU='$(CXX_PPU)' \ + TEST_CMD_PPU='$(TEST_CMD_PPU)' + +LIB_BASE = simdmath +LIB_NAME = lib$(LIB_BASE) +STATIC_LIB = $(LIB_NAME).a +SHARED_LIB = $(LIB_NAME).so + +TEST_CMD = $(TEST_CMD_PPU) + +COMMON_OBJS = testutils.o + + +all: $(ALL_TESTS) + + +$(STATIC_TESTS): %: %.o ../$(STATIC_LIB) $(COMMON_OBJS) + $(CC_PPU) $*.o $(COMMON_OBJS) $(LDFLAGS_PPU) $(STATIC_LDFLAGS_PPU) -o $@ + +$(SHARED_TESTS): %.shared: %.o ../$(SHARED_LIB) $(COMMON_OBJS) + $(CC_PPU) $*.o $(COMMON_OBJS) $(LDFLAGS_PPU) $(SHARED_LDFLAGS_PPU) -o $@ + +clean: + rm -f *.o + rm -f $(STATIC_TESTS) $(SHARED_TESTS) + rm -f core* + +check: $(ALL_TESTS) + for test in $(ALL_TESTS); do \ + echo "TEST $${test}"; \ + if $(TEST_CMD) ./$${test}; then \ + pass="$$pass $$test"; \ + else \ + fail="$$fail $$test"; \ + fi \ + done; \ + echo; echo "PASS:$$pass"; echo "FAIL:$$fail"; \ + test -z "$$fail" + +static_check: + $(MAKE) $(MAKE_DEFS) ALL_TESTS="$(STATIC_TESTS)" check + +shared_check: + $(MAKE) $(MAKE_DEFS) ALL_TESTS="$(SHARED_TESTS)" check + +../$(STATIC_LIB): + cd ../;$(MAKE) $(MAKE_DEFS) $(STATIC_LIB) + +../$(SHARED_LIB): + cd ../;$(MAKE) $(MAKE_DEFS) $(SHARED_LIB) + +%.o: %.c common-test.h testutils.h + $(CC_PPU) $(CFLAGS_PPU) -c $< + +#---------- +# C++ +#---------- +%.o: %.C + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cpp + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cc + $(CXX_PPU) $(CFLAGS_PPU) -c $< + +%.o: %.cxx + $(CXX_PPU) $(CFLAGS_PPU) -c $< + diff --git a/Extras/simdmathlibrary/ppu/tests/absi4.c b/Extras/simdmathlibrary/ppu/tests/absi4.c new file mode 100644 index 000000000..926e8d932 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/absi4.c @@ -0,0 +1,73 @@ +/* Testcase for absi4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" +int main() +{ + TEST_SET_START("20040908101807EJL","EJL", "abs"); + + int x0n = hide_int(0); + int x0p = hide_int(0); + int x1n = hide_int(-1); + int x1p = hide_int(1); + int x2n = hide_int(-83532); + int x2p = hide_int(83532); + + vec_int4 x0n_v = vec_splat_int(x0n); + vec_int4 x0p_v = vec_splat_int(x0p); + vec_int4 x1n_v = vec_splat_int(x1n); + vec_int4 x1p_v = vec_splat_int(x1p); + vec_int4 x2n_v = vec_splat_int(x2n); + vec_int4 x2p_v = vec_splat_int(x2p); + + vec_int4 res_v; + + TEST_START("absi4"); + res_v = absi4(x0n_v); + TEST_CHECK("20040908103824EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = absi4(x0p_v); + TEST_CHECK("20040908103903EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = absi4(x1n_v); + TEST_CHECK("20040908103905EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = absi4(x1p_v); + TEST_CHECK("20040908114003EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = absi4(x2n_v); + TEST_CHECK("20040908114714EJL", allequal_int4( res_v, x2p_v ), 0); + res_v = absi4(x2p_v); + TEST_CHECK("20040908114715EJL", allequal_int4( res_v, x2p_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/ceilf4.c b/Extras/simdmathlibrary/ppu/tests/ceilf4.c new file mode 100644 index 000000000..e06c0f716 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/ceilf4.c @@ -0,0 +1,92 @@ +/* Testcase for ceilf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040916103300EJL","EJL", "ceilf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4b000000; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(1.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = vec_splat_float(x0); + vec_float4 x0i_v = vec_splat_float(x0i); + vec_float4 x1_v = vec_splat_float(x1); + vec_float4 x1i_v = vec_splat_float(x1i); + vec_float4 x2_v = vec_splat_float(x2); + vec_float4 x2i_v = vec_splat_float(x2i); + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3i_v = vec_splat_float(x3i); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4i_v = vec_splat_float(x4i); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5i_v = vec_splat_float(x5i); + + vec_float4 res_v; + + TEST_START("ceilf4"); + res_v = ceilf4(x0_v); + TEST_CHECK("20040916103310EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = ceilf4(x1_v); + TEST_CHECK("20040916103324EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = ceilf4(x2_v); + TEST_CHECK("20040916103334EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = ceilf4(x3_v); + TEST_CHECK("20040916103341EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = ceilf4(x4_v); + TEST_CHECK("20040916103350EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = ceilf4(x5_v); + TEST_CHECK("20040916103357EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/common-test.h b/Extras/simdmathlibrary/ppu/tests/common-test.h new file mode 100644 index 000000000..c0da56001 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/common-test.h @@ -0,0 +1,198 @@ +/* Header file for common parts of the testsuite + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include + +static inline unsigned long long clock() +{ + unsigned long long ret; + /* This need to be fixed for the hardware errata. */ + __asm __volatile__ ( "mftb %0\n" + : "=r" (ret) + : + : "memory"); + return (ret); +} +// Test files begin with TEST_SET_START("your initials","test set description") +// Individual tests begin with TEST_START("name of test") +// and end with TEST_PASS(), TEST_FAIL("reason for failure") or TEST_CHECK() +// Or you can run a test encapsulated in a function with: +// TEST_FUNCTION("name of test", function(), "reason for failure") +// +// The clock starts when you call TEST_START and stops with TEST_PASS, TEST_FAIL or TEST_CHECK +// After a start there can be several PASS, FAIL or CHECK calls, each one counts as a test, time is measured from the prior call +// +char + *__initials, // Test owner's initials + *__description, // short descriptive name for this test set + *__name, // name of the currently running test + *__set_id; // id of the the test set +int +// __zip=0, + __success=1, // set to 0 if any tests failed + __count, // Total number of tests run + __passed; // Total number of tests passed +unsigned long long + __ttemp, + __time, // For timing tests (usually start time of last test) + __ttime; // Cumulative test runtime NOT counting runtime of the TEST macros + +// TEST_SET_START +// Call at the start of a set of related tests to identify them +// Prints a "start of set banner message" +// set_id - unique test set identifyer a time in the format yyyymmddhhmmss followed by your initials ie: 20040716104615GAC +// initials - your initials +// description - brief descriptive name for this test set +#define TEST_SET_START(set_id,initials,description) \ + do { \ + __set_id=set_id; \ + __initials=initials; \ + __description=description; \ + __count=0; \ + __passed=0; \ + __time=0; \ + __ttime=0; \ + printf("0\t%s\t%d\t%s\tSTART\tpassed\ttotal\ttime\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,__set_id, __description); \ + } while(0) + +// TEST_START +// Begins a test, and starts the clock +// name - brief name for this test +#define TEST_START(name) \ + do { \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __name=name; \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + } while(0) + +// TEST_PASS +// Indicates the test passed +// test_id - unique test ID number, same format as the set_id number +// This should match the id provided to the matching TEST_FAIL call +#define TEST_PASS(test_id) \ + do { \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%lld\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + } while(0) + +// TEST_FAIL +// Indicates the test failed +// test_id - unique test ID number, same format as the set_id number +// This should match the id provided to the matching TEST_PASS call +// why - brief description of why it failed +#define TEST_FAIL(test_id,why,error_code) \ + do { \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,error_code); \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + } while(0) + +// TEST_CHECK +// Passes or fails the test after evaluating the "test" argument (just like assert but without terminating the program) +// The clock is immediately stopped so the time required to evaluate "test" will NOT be included in the reported time +// If the test failed, the reason will be printed as FAILED BECAUSE: check (value of "test") failed +// test_id - unique test ID number, same format as the set_id number +// test - expression evaluating to true/false +#define TEST_CHECK(test_id,test,error_code) \ + do { \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + if(test) \ + { \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%lld\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + } \ + else \ + { \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: check %s failed\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,#test,error_code); \ + } \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + } while(0) + +// TEST_FUNCTION +// Runs a test encapsulated in a function that returns 0 if the test passed and an error number if it failed +// The clock is started on calling the function and stopped as soon as it returns so the branching logic will not be included in the time +// test_id - unique test ID number, same format as the set_id number +// name - brief name for the test +// func - function invocation (should include parenthesis, may have arguments) +// why - brief description to print if the test fails +#define TEST_FUNCTION(test_id,name,func,why) \ + do { \ + TEST_START(name); \ + int result=func; \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + if(result==0) \ + { \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + } \ + else \ + { \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,result); \ + } \ + __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + } while(0) + +// TEST_SET_DONE +// Ends a set of tests, prints out the closing banner (OK if all tests pass, PROBLEM if any fail) +// Also prints count of tests passed, tests run and total time +#define TEST_SET_DONE() \ + do { \ + printf("9\t%s\t%d\t%s\t%s\t%d\t%d\t%lld\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,(__count==__passed)?"OK":"PROBLEM",__passed,__count,__ttime,__set_id,__description); \ + } while(0) + +// TEST_EXIT +// Call this ONCE at the very end of the test program, it calls "exit" to return +// EXIT_SUCCESS if all tests passed or EXIT_FAILURE if any tests failed. +// This allows the makefile/shell script running the tests to know which ones failed +#define TEST_EXIT() \ + do { \ + printf("FINISHED!\n"); \ + if(__success) \ + exit(0); \ + else \ + exit(-1); \ + } while (0) diff --git a/Extras/simdmathlibrary/ppu/tests/copysignf4.c b/Extras/simdmathlibrary/ppu/tests/copysignf4.c new file mode 100644 index 000000000..db7adcb83 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/copysignf4.c @@ -0,0 +1,67 @@ +/* Testcase for copysignf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + +int main() +{ + TEST_SET_START("20040917114054EJL", "EJL", "copysignf"); + + float x0m = hide_float(1989.0f); + float x0s = hide_float(-319875.0f); + float x0c = hide_float(-1989.0f); + float x1m = hide_float(9013.0f); + float x1s = hide_float(185.0f); + float x1c = hide_float(9013.0f); + + vec_float4 x0m_v = vec_splat_float(x0m); + vec_float4 x0s_v = vec_splat_float(x0s); + vec_float4 x0c_v = vec_splat_float(x0c); + + vec_float4 x1m_v = vec_splat_float(x1m); + vec_float4 x1s_v = vec_splat_float(x1s); + vec_float4 x1c_v = vec_splat_float(x1c); + + vec_float4 res_v; + + TEST_START("copysignf4"); + res_v = copysignf4( x0m_v, x0s_v ); + TEST_CHECK("20040917114058EJL", allequal_float4( res_v, x0c_v ), 0); + res_v = copysignf4( x1m_v, x1s_v ); + TEST_CHECK("20040917114100EJL", allequal_float4( res_v, x1c_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/divf4.c b/Extras/simdmathlibrary/ppu/tests/divf4.c new file mode 100644 index 000000000..8816b7ed7 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/divf4.c @@ -0,0 +1,128 @@ +/* Testcase for divf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040928105926EJL","EJL", "divf4"); + + unsigned int i0n = 0x75013340; + unsigned int i0d = 0x75e7753f; + unsigned int i0r = 0x3e8ee64b; + unsigned int i1n = 0x4c7fed5a; + unsigned int i1d = 0x3a0731f0; + unsigned int i1r = 0x51f24e86; + unsigned int i2n = 0x5b08b303; + unsigned int i2d = 0x562f5046; + unsigned int i2r = 0x44479d24; + unsigned int i3n = 0x748a9b87; + unsigned int i3d = 0x6b014b46; + unsigned int i3r = 0x49093864; + unsigned int i4n = 0x35dcf9d8; + unsigned int i4d = 0x6278d6e0; + unsigned int i4r = 0x12e355b5; + unsigned int i5n = 0x74d505fd; + unsigned int i5d = 0x61ef565e; + unsigned int i5r = 0x5263daa3; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = vec_splat_float(x0n); + vec_float4 x0d_v = vec_splat_float(x0d); + vec_float4 x0r_v = vec_splat_float(x0r); + + vec_float4 x1n_v = vec_splat_float(x1n); + vec_float4 x1d_v = vec_splat_float(x1d); + vec_float4 x1r_v = vec_splat_float(x1r); + + vec_float4 x2n_v = vec_splat_float(x2n); + vec_float4 x2d_v = vec_splat_float(x2d); + vec_float4 x2r_v = vec_splat_float(x2r); + + vec_float4 x3n_v = vec_splat_float(x3n); + vec_float4 x3d_v = vec_splat_float(x3d); + vec_float4 x3r_v = vec_splat_float(x3r); + + vec_float4 x4n_v = vec_splat_float(x4n); + vec_float4 x4d_v = vec_splat_float(x4d); + vec_float4 x4r_v = vec_splat_float(x4r); + + vec_float4 x5n_v = vec_splat_float(x5n); + vec_float4 x5d_v = vec_splat_float(x5d); + vec_float4 x5r_v = vec_splat_float(x5r); + + vec_float4 res_v; + + TEST_START("divf4"); + res_v = divf4(x0n_v, x0d_v); + TEST_CHECK("20040928105932EJL", allequal_ulps_float4( res_v, x0r_v, 2 ), 0); + res_v = divf4(x1n_v, x1d_v); + TEST_CHECK("20040928105934EJL", allequal_ulps_float4( res_v, x1r_v, 2 ), 0); + res_v = divf4(x2n_v, x2d_v); + TEST_CHECK("20040928105936EJL", allequal_ulps_float4( res_v, x2r_v, 2 ), 0); + res_v = divf4(x3n_v, x3d_v); + TEST_CHECK("20040928105938EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = divf4(x4n_v, x4d_v); + TEST_CHECK("20040928105940EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = divf4(x5n_v, x5d_v); + TEST_CHECK("20040928105943EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/divi4.c b/Extras/simdmathlibrary/ppu/tests/divi4.c new file mode 100644 index 000000000..701c1ce62 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/divi4.c @@ -0,0 +1,124 @@ +/* Testcase for divi4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + + +int main() +{ + TEST_SET_START("20040928161739EJL","EJL", "divi4"); + + int x0n = 0xffccb78d; + int x0d = 0x0 ; + int x0q = 0x0 ; + int x0r = 0xffccb78d; + int x1n = 0x0; + int x1d = 0xff976bb6; + int x1q = 0x0 ; + int x1r = 0x0; + int x2n = 0x0; + int x2d = 0x0; + int x2q = 0x0 ; + int x2r = 0x0; + int x3n = 0xf0e91618; + int x3d = 0xfddff7ac; + int x3q = 0x7 ; + int x3r = 0xffc95064; + + int x4n = 0xf2128d9d; + int x4d = 0xe0f76 ; + int x4q = 0xffffff03; + int x4r = 0xfff7d53b; + int x5n = 0xda1ba2ce; + int x5d = 0x4c9 ; + int x5q = 0xfff814d3; + int x5r = 0xfffffd23; + int x6n = 0xdd4426a6; + int x6d = 0xf8d245cf; + int x6q = 0x4 ; + int x6r = 0xf9fb0f6a; + int x7n = 0xd1d5ae9 ; + int x7d = 0x333ab105; + int x7q = 0x0 ; + int x7r = 0xd1d5ae9 ; + + int x8n = 0x3e0c6 ; + int x8d = 0xfff24255; + int x8q = 0x0 ; + int x8r = 0x3e0c6 ; + int x9n = 0xfd6fe27e; + int x9d = 0xf32454 ; + int x9q = 0xfffffffe; + int x9r = 0xff562b26; + int x10n =0xfb150f79; + int x10d =0xf521 ; + int x10q =0xfffffade; + int x10r =0xffff42db; + int x11n =0xfe88071f; + int x11d =0xfff937c2; + int x11q =0x37 ; + int x11r =0xfffd0c71; + + + vec_int4 x0n_v = (vec_int4){ x0n, x1n, x2n, x3n }; + vec_int4 x1n_v = (vec_int4){ x4n, x5n, x6n, x7n }; + vec_int4 x2n_v = (vec_int4){ x8n, x9n, x10n, x11n }; + + vec_int4 x0d_v = (vec_int4){ x0d, x1d, x2d, x3d }; + vec_int4 x1d_v = (vec_int4){ x4d, x5d, x6d, x7d }; + vec_int4 x2d_v = (vec_int4){ x8d, x9d, x10d, x11d }; + + vec_int4 x0q_v = (vec_int4){ x0q, x1q, x2q, x3q }; + vec_int4 x1q_v = (vec_int4){ x4q, x5q, x6q, x7q }; + vec_int4 x2q_v = (vec_int4){ x8q, x9q, x10q, x11q }; + + vec_int4 x0r_v = (vec_int4){ x0r, x1r, x2r, x3r }; + vec_int4 x1r_v = (vec_int4){ x4r, x5r, x6r, x7r }; + vec_int4 x2r_v = (vec_int4){ x8r, x9r, x10r, x11r }; + + divi4_t res; + + TEST_START("divi4"); + res = divi4(x0n_v, x0d_v); + TEST_CHECK("20040928161846EJL", allequal_int4( res.quot, x0q_v ) && allequal_int4( res.rem, x0r_v ), 0); + res = divi4(x1n_v, x1d_v); + TEST_CHECK("20040928161851EJL", allequal_int4( res.quot, x1q_v ) && allequal_int4( res.rem, x1r_v ), 0); + res = divi4(x2n_v, x2d_v); + TEST_CHECK("20040928161855EJL", allequal_int4( res.quot, x2q_v ) && allequal_int4( res.rem, x2r_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/fabsf4.c b/Extras/simdmathlibrary/ppu/tests/fabsf4.c new file mode 100644 index 000000000..4864e46a9 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/fabsf4.c @@ -0,0 +1,85 @@ +/* Testcase for fabsf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + +int main() +{ + TEST_SET_START("20040915032605EJL","EJL", "fabsf"); + + unsigned int i3n = 0xff000000; + unsigned int i3p = 0x7f000000; + + float x0n = hide_float(-0.0f); + float x0p = hide_float(0.0f); + float x1n = hide_float(-83532.96153153f); + float x1p = hide_float(83532.96153153f); + float x2n = hide_float(-0.0000000013152f); + float x2p = hide_float(0.0000000013152f); + float x3n = hide_float(make_float(i3n)); + float x3p = hide_float(make_float(i3p)); + + vec_float4 x0n_v = vec_splat_float(x0n); + vec_float4 x0p_v = vec_splat_float(x0p); + vec_float4 x1n_v = vec_splat_float(x1n); + vec_float4 x1p_v = vec_splat_float(x1p); + vec_float4 x2n_v = vec_splat_float(x2n); + vec_float4 x2p_v = vec_splat_float(x2p); + vec_float4 x3n_v = vec_splat_float(x3n); + vec_float4 x3p_v = vec_splat_float(x3p); + + vec_float4 res_v; + + TEST_START("fabsf4"); + res_v = fabsf4(x0n_v); + TEST_CHECK("20040915032618EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = fabsf4(x0p_v); + TEST_CHECK("20040915032632EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = fabsf4(x1n_v); + TEST_CHECK("20040915032643EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = fabsf4(x1p_v); + TEST_CHECK("20040915032654EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = fabsf4(x2n_v); + TEST_CHECK("20040915032704EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = fabsf4(x2p_v); + TEST_CHECK("20040915032712EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = fabsf4(x3n_v); + TEST_CHECK("20040915032719EJL", allequal_float4( res_v, x3p_v ), 0); + res_v = fabsf4(x3p_v); + TEST_CHECK("20040915032729EJL", allequal_float4( res_v, x3p_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h b/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h new file mode 100644 index 000000000..0d6abd6f8 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h @@ -0,0 +1,189 @@ +/* Header file for common parts of the testsuite + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _FLOATINGPOINT_TESTS_H_ +#define _FLOATINGPOINT_TESTS_H_ + +#if defined(__PPC__) + #include + #define vec_uchar16 vector unsigned char + #define vec_char16 vector signed char + #define vec_ushort8 vector unsigned short + #define vec_short8 vector signed short + #define vec_uint4 vector unsigned int + #define vec_int4 vector signed int + #define vec_ullong2 vector unsigned long long + #define vec_llong2 vector signed long long + #define vec_float4 vector float + #define vec_double2 vector double +#else + #if __SPU__ + #include + #endif +#endif + +// To avoid type punning warnings (for printing in hex notation, doing bit-diff etc) +typedef union { + double d; + unsigned char uc[8]; + unsigned int ui[2]; + unsigned long long int ull; +} sce_math_alt_double; + +typedef union { + float f; + unsigned char uc[4]; + unsigned int ui; +} sce_math_alt_float; + +#if (__PPC__ || __SPU__) +typedef union { + vec_int4 vsi; + int si[4]; +} sce_math_alt_vec_int4; + +typedef union { + vec_uint4 vui; + int ui[4]; +} sce_math_alt_vec_uint4; + +typedef union { + vec_float4 vf; + float sf[4]; + unsigned int ui[4]; +} sce_math_alt_vec_float4; +#endif +#if __SPU__ + typedef union { + double sd[2]; + vec_double2 vd; + unsigned long long int ui[2]; + } sce_math_alt_vec_double2; +#endif + +#if __PPC__ + inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub(refi, valsi); + vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff); + + return vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) )); + } + inline int bitdiff(float ref, float val) { + sce_math_alt_float aref, aval; + aref.f = ref; + aval.f = val; + int diff = aref.ui - aval.ui; + return (diff>0)?diff:-diff; + } + inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub(refi, valsi); + vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff); + + diff = vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) )); + vec_float4 logdiff = vec_loge(vec_ctf(diff,0)); + return vec_sub(((vec_int4){32,32,32,32}), vec_cts(vec_ceil(logdiff),0)); + } + inline int bitmatch(float ref, float val) { + sce_math_alt_vec_float4 aref, aval; + sce_math_alt_vec_int4 adiff; + aref.sf[0] = ref; + aval.sf[0] = val; + adiff.vsi = bitmatch4(aref.vf, aval.vf); + return adiff.si[0]; + } + inline float extractFloat(vec_float4 vf, int index) + { + sce_math_alt_vec_float4 vec; + vec.vf = vf; + return vec.sf[index]; + } + inline int extractInt(vec_int4 vi, int index) + { + sce_math_alt_vec_int4 vec; + vec.vsi = vi; + return vec.si[index]; + } + inline int extractUInt(vec_uint4 vi, int index) + { + sce_math_alt_vec_uint4 vec; + vec.vui = vi; + return vec.ui[index]; + } +#else + #if __SPU__ + inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)); + } + inline int bitdiff(float ref, float val) { + return spu_extract(bitdiff4(spu_promote(ref,0), spu_promote(val,0)), 0); + } + inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return (vec_int4)spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); + } + inline int bitmatch(float ref, float val) { + return spu_extract(bitmatch4(spu_promote(ref,0), spu_promote(val,0)), 0); + } + + #else + inline int bitdiff(sce_math_alt_float ref, sce_math_alt_float val) { + int diff = ref.ui - val.ui; + return((diff>0)?diff:-diff); + } + inline int bitmatch(sce_math_alt_float ref, sce_math_alt_float val) { + int diff, i; + unsigned int udiff; + diff = ref.ui - val.ui; + udiff = (diff>0) ? diff : -diff; + i = 32; + while(udiff != 0) { + i = i-1; + udiff = udiff >> 1; + } + return udiff; + } + #endif // __SPU__ +#endif // __PPC__ + + +#endif // _FLOATINGPOINT_TESTS_H_ diff --git a/Extras/simdmathlibrary/ppu/tests/floorf4.c b/Extras/simdmathlibrary/ppu/tests/floorf4.c new file mode 100644 index 000000000..26435ea35 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/floorf4.c @@ -0,0 +1,92 @@ +/* Testcase for floorf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040916145017EJL","EJL", "floorf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(-1.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79616.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = vec_splat_float(x0); + vec_float4 x0i_v = vec_splat_float(x0i); + vec_float4 x1_v = vec_splat_float(x1); + vec_float4 x1i_v = vec_splat_float(x1i); + vec_float4 x2_v = vec_splat_float(x2); + vec_float4 x2i_v = vec_splat_float(x2i); + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3i_v = vec_splat_float(x3i); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4i_v = vec_splat_float(x4i); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5i_v = vec_splat_float(x5i); + + vec_float4 res_v; + + TEST_START("floorf4"); + res_v = floorf4(x0_v); + TEST_CHECK("20040916145022EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = floorf4(x1_v); + TEST_CHECK("20040916145024EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = floorf4(x2_v); + TEST_CHECK("20040916145027EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = floorf4(x3_v); + TEST_CHECK("20040916145029EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = floorf4(x4_v); + TEST_CHECK("20040916145032EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = floorf4(x5_v); + TEST_CHECK("20040916145034EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/fminf4_fmaxf4.c b/Extras/simdmathlibrary/ppu/tests/fminf4_fmaxf4.c new file mode 100644 index 000000000..5c45fa4f7 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/fminf4_fmaxf4.c @@ -0,0 +1,91 @@ +/* Testcase for fminf4 and fmaxf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + +int main() +{ + TEST_SET_START("20040928184342EJL","EJL", "fminf4_fmaxf4"); + + float x0min = hide_float(1760.135f); + float x0max = hide_float(19355.03f); + + float x1min = hide_float(-12351.9f); + float x1max = hide_float(-139.035f); + + float x2min = hide_float(-1.0); + float x2max = hide_float(0.0); + + vec_float4 x0min_v = vec_splat_float(x0min); + vec_float4 x0max_v = vec_splat_float(x0max); + + vec_float4 x1min_v = vec_splat_float(x1min); + vec_float4 x1max_v = vec_splat_float(x1max); + + vec_float4 x2min_v = vec_splat_float(x2min); + vec_float4 x2max_v = vec_splat_float(x2max); + + vec_float4 res_v; + + TEST_START("fminf4"); + res_v = fminf4(x0min_v, x0max_v); + TEST_CHECK("20040928184345EJL", allequal_float4( res_v, x0min_v ), 0); + res_v = fminf4(x0max_v, x0min_v); + TEST_CHECK("20040928184349EJL", allequal_float4( res_v, x0min_v ), 0); + res_v = fminf4(x1min_v, x1max_v); + TEST_CHECK("20040928184351EJL", allequal_float4( res_v, x1min_v ), 0); + res_v = fminf4(x1max_v, x1min_v); + TEST_CHECK("20040928184353EJL", allequal_float4( res_v, x1min_v ), 0); + res_v = fminf4(x2min_v, x2max_v); + TEST_CHECK("20040928184354EJL", allequal_float4( res_v, x2min_v ), 0); + res_v = fminf4(x2max_v, x2min_v); + TEST_CHECK("20040928184356EJL", allequal_float4( res_v, x2min_v ), 0); + + TEST_START("fmaxf4"); + res_v = fmaxf4(x0min_v, x0max_v); + TEST_CHECK("20040928184411EJL", allequal_float4( res_v, x0max_v ), 0); + res_v = fmaxf4(x0max_v, x0min_v); + TEST_CHECK("20040928184413EJL", allequal_float4( res_v, x0max_v ), 0); + res_v = fmaxf4(x1min_v, x1max_v); + TEST_CHECK("20040928184415EJL", allequal_float4( res_v, x1max_v ), 0); + res_v = fmaxf4(x1max_v, x1min_v); + TEST_CHECK("20040928184416EJL", allequal_float4( res_v, x1max_v ), 0); + res_v = fmaxf4(x2min_v, x2max_v); + TEST_CHECK("20040928184417EJL", allequal_float4( res_v, x2max_v ), 0); + res_v = fmaxf4(x2max_v, x2min_v); + TEST_CHECK("20040928184419EJL", allequal_float4( res_v, x2max_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/fmodf4.c b/Extras/simdmathlibrary/ppu/tests/fmodf4.c new file mode 100644 index 000000000..9574e88fe --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/fmodf4.c @@ -0,0 +1,129 @@ +/* Testcase for fmodf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + + +int main() +{ + TEST_SET_START("20040928191240EJL","EJL", "fmodf4"); + + unsigned int i0n = 0x449edbc6; + unsigned int i0d = 0x40cf799d; + unsigned int i0r = 0x3daa7300; + unsigned int i1n = 0x6bca107a; + unsigned int i1d = 0x6c4a107a; + unsigned int i1r = 0x6bca107a; + unsigned int i2n = 0x1c123605; + unsigned int i2d = 0x1c923602; + unsigned int i2r = 0x1c123605; + unsigned int i3n = 0x2b4c50fa; + unsigned int i3d = 0x253a3ae3; + unsigned int i3r = 0x25141df9; + unsigned int i4n = 0x73addffc; + unsigned int i4d = 0x742ddffc; + unsigned int i4r = 0x73addffc; + unsigned int i5n = 0x29d4d97c; + unsigned int i5d = 0x2a546e77; + unsigned int i5r = 0x29d4d97c; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = vec_splat_float(x0n); + vec_float4 x0d_v = vec_splat_float(x0d); + vec_float4 x0r_v = vec_splat_float(x0r); + + vec_float4 x1n_v = vec_splat_float(x1n); + vec_float4 x1d_v = vec_splat_float(x1d); + vec_float4 x1r_v = vec_splat_float(x1r); + + vec_float4 x2n_v = vec_splat_float(x2n); + vec_float4 x2d_v = vec_splat_float(x2d); + vec_float4 x2r_v = vec_splat_float(x2r); + + vec_float4 x3n_v = vec_splat_float(x3n); + vec_float4 x3d_v = vec_splat_float(x3d); + vec_float4 x3r_v = vec_splat_float(x3r); + + vec_float4 x4n_v = vec_splat_float(x4n); + vec_float4 x4d_v = vec_splat_float(x4d); + vec_float4 x4r_v = vec_splat_float(x4r); + + vec_float4 x5n_v = vec_splat_float(x5n); + vec_float4 x5d_v = vec_splat_float(x5d); + vec_float4 x5r_v = vec_splat_float(x5r); + + vec_float4 res_v; + + TEST_START("fmodf4"); + res_v = fmodf4(x0n_v, x0d_v); + TEST_CHECK("20040928191245EJL", allequal_ulps_float4( res_v, x0r_v, 1 ), 0); + res_v = fmodf4(x1n_v, x1d_v); + TEST_CHECK("20040928191247EJL", allequal_ulps_float4( res_v, x1r_v, 1 ), 0); + res_v = fmodf4(x2n_v, x2d_v); + TEST_CHECK("20040928191249EJL", allequal_ulps_float4( res_v, x2r_v, 1 ), 0); + res_v = fmodf4(x3n_v, x3d_v); + TEST_CHECK("20040928191251EJL", allequal_ulps_float4( res_v, x3r_v, 1 ), 0); + res_v = fmodf4(x4n_v, x4d_v); + TEST_CHECK("20040928191253EJL", allequal_ulps_float4( res_v, x4r_v, 1 ), 0); + res_v = fmodf4(x5n_v, x5d_v); + TEST_CHECK("20040928191255EJL", allequal_ulps_float4( res_v, x5r_v, 1 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/modff4.c b/Extras/simdmathlibrary/ppu/tests/modff4.c new file mode 100644 index 000000000..1379fa215 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/modff4.c @@ -0,0 +1,108 @@ +/* Testcase for modff4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + +int main() +{ + TEST_SET_START("20040916170642EJL", "EJL", "modff"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x0f = hide_float(0.91825f); + + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x1f = hide_float(-0.12958f); + + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x2f = hide_float(-0.1875f); + + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x3f = hide_float(0.5f); + + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x4f = hide_float(0.0f); + + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + float x5f = hide_float(0.0f); + + vec_float4 x0_v = vec_splat_float(x0); + vec_float4 x0i_v = vec_splat_float(x0i); + vec_float4 x0f_v = vec_splat_float(x0f); + vec_float4 x1_v = vec_splat_float(x1); + vec_float4 x1i_v = vec_splat_float(x1i); + vec_float4 x1f_v = vec_splat_float(x1f); + vec_float4 x2_v = vec_splat_float(x2); + vec_float4 x2i_v = vec_splat_float(x2i); + vec_float4 x2f_v = vec_splat_float(x2f); + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3i_v = vec_splat_float(x3i); + vec_float4 x3f_v = vec_splat_float(x3f); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4i_v = vec_splat_float(x4i); + vec_float4 x4f_v = vec_splat_float(x4f); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5i_v = vec_splat_float(x5i); + vec_float4 x5f_v = vec_splat_float(x5f); + + vec_float4 integer_v, fraction_v; + + TEST_START("modff4"); + fraction_v = modff4(x0_v, &integer_v); + TEST_CHECK("20040916170647EJL", allequal_float4( integer_v, x0i_v ) && allequal_float4( fraction_v, x0f_v ), 0); + fraction_v = modff4(x1_v, &integer_v); + TEST_CHECK("20040916170650EJL", allequal_float4( integer_v, x1i_v ) && allequal_float4( fraction_v, x1f_v ), 0); + fraction_v = modff4(x2_v, &integer_v); + TEST_CHECK("20040916170653EJL", allequal_float4( integer_v, x2i_v ) && allequal_float4( fraction_v, x2f_v ), 0); + fraction_v = modff4(x3_v, &integer_v); + TEST_CHECK("20040916170656EJL", allequal_float4( integer_v, x3i_v ) && allequal_float4( fraction_v, x3f_v ), 0); + fraction_v = modff4(x4_v, &integer_v); + TEST_CHECK("20040916170658EJL", allequal_float4( integer_v, x4i_v ) && allequal_float4( fraction_v, x4f_v ), 0); + fraction_v = modff4(x5_v, &integer_v); + TEST_CHECK("20040916170701EJL", allequal_float4( integer_v, x5i_v ) && allequal_float4( fraction_v, x5f_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/negatef4.c b/Extras/simdmathlibrary/ppu/tests/negatef4.c new file mode 100644 index 000000000..c0f809f01 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/negatef4.c @@ -0,0 +1,86 @@ +/* Testcase for negatef4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040930102649EJL","EJL", "negatef4"); + + unsigned int i3n = 0xff000000; + unsigned int i3p = 0x7f000000; + + float x0n = hide_float(-0.0f); + float x0p = hide_float(0.0f); + float x1n = hide_float(-83532.96153153f); + float x1p = hide_float(83532.96153153f); + float x2n = hide_float(-0.0000000013152f); + float x2p = hide_float(0.0000000013152f); + float x3n = hide_float(make_float(i3n)); + float x3p = hide_float(make_float(i3p)); + + vec_float4 x0n_v = vec_splat_float(x0n); + vec_float4 x0p_v = vec_splat_float(x0p); + vec_float4 x1n_v = vec_splat_float(x1n); + vec_float4 x1p_v = vec_splat_float(x1p); + vec_float4 x2n_v = vec_splat_float(x2n); + vec_float4 x2p_v = vec_splat_float(x2p); + vec_float4 x3n_v = vec_splat_float(x3n); + vec_float4 x3p_v = vec_splat_float(x3p); + + vec_float4 res_v; + + TEST_START("negatef4"); + res_v = negatef4(x0n_v); + TEST_CHECK("20040930102652EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = negatef4(x0p_v); + TEST_CHECK("20040930102653EJL", allequal_float4( res_v, x0n_v ), 0); + res_v = negatef4(x1n_v); + TEST_CHECK("20040930102655EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = negatef4(x1p_v); + TEST_CHECK("20040930102657EJL", allequal_float4( res_v, x1n_v ), 0); + res_v = negatef4(x2n_v); + TEST_CHECK("20040930102659EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = negatef4(x2p_v); + TEST_CHECK("20040930102701EJL", allequal_float4( res_v, x2n_v ), 0); + res_v = negatef4(x3n_v); + TEST_CHECK("20040930102703EJL", allequal_float4( res_v, x3p_v ), 0); + res_v = negatef4(x3p_v); + TEST_CHECK("20040930102705EJL", allequal_float4( res_v, x3n_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/negatei4.c b/Extras/simdmathlibrary/ppu/tests/negatei4.c new file mode 100644 index 000000000..ad78aa9b5 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/negatei4.c @@ -0,0 +1,83 @@ +/* Testcase for negatei4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040930102649EJL","EJL", "negatei4"); + + int x0n = hide_int(0); + int x0p = hide_int(0); + int x1n = hide_int(-83532); + int x1p = hide_int(83532); + int x2n = hide_int(-13152); + int x2p = hide_int(13152); + int x3n = hide_int(-1); + int x3p = hide_int(1); + + vec_int4 x0n_v = vec_splat_int(x0n); + vec_int4 x0p_v = vec_splat_int(x0p); + vec_int4 x1n_v = vec_splat_int(x1n); + vec_int4 x1p_v = vec_splat_int(x1p); + vec_int4 x2n_v = vec_splat_int(x2n); + vec_int4 x2p_v = vec_splat_int(x2p); + vec_int4 x3n_v = vec_splat_int(x3n); + vec_int4 x3p_v = vec_splat_int(x3p); + + vec_int4 res_v; + + TEST_START("negatei4"); + res_v = negatei4(x0n_v); + TEST_CHECK("20040930102652EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = negatei4(x0p_v); + TEST_CHECK("20040930102653EJL", allequal_int4( res_v, x0n_v ), 0); + res_v = negatei4(x1n_v); + TEST_CHECK("20040930102655EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = negatei4(x1p_v); + TEST_CHECK("20040930102657EJL", allequal_int4( res_v, x1n_v ), 0); + res_v = negatei4(x2n_v); + TEST_CHECK("20040930102659EJL", allequal_int4( res_v, x2p_v ), 0); + res_v = negatei4(x2p_v); + TEST_CHECK("20040930102701EJL", allequal_int4( res_v, x2n_v ), 0); + res_v = negatei4(x3n_v); + TEST_CHECK("20040930102703EJL", allequal_int4( res_v, x3p_v ), 0); + res_v = negatei4(x3p_v); + TEST_CHECK("20040930102705EJL", allequal_int4( res_v, x3n_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/recipf4.c b/Extras/simdmathlibrary/ppu/tests/recipf4.c new file mode 100644 index 000000000..52f1e5640 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/recipf4.c @@ -0,0 +1,108 @@ +/* Testcase for recipf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040920142553EJL","EJL", "recipf4"); + + unsigned int i1 = 0xff000000; // -2^127 + unsigned int i2 = 0xfe7fffff; // -2^126 - 1 ulp + unsigned int i2r = 0x80800001; + unsigned int i3 = 0x75013340; // random values + unsigned int i3r = 0x09fd9f35; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x090d9277; + unsigned int i5 = 0x4c7fed5a; + unsigned int i5r = 0x32800954; + unsigned int i6 = 0x3a0731f0; + unsigned int i6r = 0x44f2602e; + unsigned int i7 = 0x69784a07; + unsigned int i7r = 0x1583f9a3; + + float x1 = hide_float(make_float(i1)); + float x1r = hide_float(0.0f); + float x2 = hide_float(make_float(i2)); + float x2r = hide_float(make_float(i2r)); + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x1_v = vec_splat_float(x1); + vec_float4 x1r_v = vec_splat_float(x1r); + vec_float4 x2_v = vec_splat_float(x2); + vec_float4 x2r_v = vec_splat_float(x2r); + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3r_v = vec_splat_float(x3r); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4r_v = vec_splat_float(x4r); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5r_v = vec_splat_float(x5r); + vec_float4 x6_v = vec_splat_float(x6); + vec_float4 x6r_v = vec_splat_float(x6r); + vec_float4 x7_v = vec_splat_float(x7); + vec_float4 x7r_v = vec_splat_float(x7r); + + vec_float4 res_v; + + TEST_START("recipf4"); + res_v = recipf4(x1_v); + TEST_CHECK("20040920142600EJL", allequal_float4( res_v, x1r_v), 0); + res_v = recipf4(x2_v); + TEST_CHECK("20040920142602EJL", allequal_ulps_float4( res_v, x2r_v, 2 ), 0); + res_v = recipf4(x3_v); + TEST_CHECK("20040920142604EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = recipf4(x4_v); + TEST_CHECK("20040920142606EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = recipf4(x5_v); + TEST_CHECK("20040920142608EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = recipf4(x6_v); + TEST_CHECK("20040920142609EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = recipf4(x7_v); + TEST_CHECK("20040920142611EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/rsqrtf4.c b/Extras/simdmathlibrary/ppu/tests/rsqrtf4.c new file mode 100644 index 000000000..1a8801e2c --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/rsqrtf4.c @@ -0,0 +1,95 @@ +/* Testcase for rsqrtf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + + + +int main() +{ + TEST_SET_START("20040928182349EJL","EJL", "rsqrtf4"); + + unsigned int i3 = 0x742c4455; + unsigned int i3r = 0x251c099a; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x243e5fe2; + unsigned int i5 = 0x4baa9e3c; + unsigned int i5r = 0x395dbbeb; + unsigned int i6 = 0x39344296; + unsigned int i6r = 0x429889eb; + unsigned int i7 = 0x68a586b0; + unsigned int i7r = 0x2ae11e67; + + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3r_v = vec_splat_float(x3r); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4r_v = vec_splat_float(x4r); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5r_v = vec_splat_float(x5r); + vec_float4 x6_v = vec_splat_float(x6); + vec_float4 x6r_v = vec_splat_float(x6r); + vec_float4 x7_v = vec_splat_float(x7); + vec_float4 x7r_v = vec_splat_float(x7r); + + vec_float4 res_v; + + TEST_START("rsqrtf4"); + res_v = rsqrtf4(x3_v); + TEST_CHECK("20040928182352EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = rsqrtf4(x4_v); + TEST_CHECK("20040928182355EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = rsqrtf4(x5_v); + TEST_CHECK("20040928182357EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = rsqrtf4(x6_v); + TEST_CHECK("20040928182358EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = rsqrtf4(x7_v); + TEST_CHECK("20040928182401EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/sqrtf4.c b/Extras/simdmathlibrary/ppu/tests/sqrtf4.c new file mode 100644 index 000000000..70bee97b2 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/sqrtf4.c @@ -0,0 +1,100 @@ +/* Testcase for sqrtf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040928182549EJL","EJL", "sqrtf4"); + + unsigned int i3 = 0x742c4455; + unsigned int i3r = 0x59d20034; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x5aac1fb5; + unsigned int i5 = 0x4baa9e3c; + unsigned int i5r = 0x4593c7d8; + unsigned int i6 = 0x39344296; + unsigned int i6r = 0x3c56d14c; + unsigned int i7 = 0x68a586b0; + unsigned int i7r = 0x54118f09; + + float x0 = hide_float(0.0f); + float x0r = hide_float(0.0f); + + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x0_v = vec_splat_float(x0); + vec_float4 x0r_v = vec_splat_float(x0r); + + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3r_v = vec_splat_float(x3r); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4r_v = vec_splat_float(x4r); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5r_v = vec_splat_float(x5r); + vec_float4 x6_v = vec_splat_float(x6); + vec_float4 x6r_v = vec_splat_float(x6r); + vec_float4 x7_v = vec_splat_float(x7); + vec_float4 x7r_v = vec_splat_float(x7r); + + vec_float4 res_v; + + TEST_START("sqrtf4"); + res_v = sqrtf4(x0_v); + TEST_CHECK("20040928182551EJL", allequal_float4( res_v, x0r_v ), 0); + res_v = sqrtf4(x3_v); + TEST_CHECK("20040928182552EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = sqrtf4(x4_v); + TEST_CHECK("20040928182554EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = sqrtf4(x5_v); + TEST_CHECK("20040928182556EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = sqrtf4(x6_v); + TEST_CHECK("20040928182557EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = sqrtf4(x7_v); + TEST_CHECK("20040928182559EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/tests/testutils.c b/Extras/simdmathlibrary/ppu/tests/testutils.c new file mode 100644 index 000000000..f5bfa3b3b --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/testutils.c @@ -0,0 +1,67 @@ +/* Common part of the testsuite + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +unsigned int +hide_uint( unsigned int x ) +{ + return x; +} + +int +hide_int( int x ) +{ + return x; +} + +float +hide_float( float x ) +{ + return x; +} + +double +hide_double( double x ) +{ + return x; +} + +float +make_float( unsigned int x ) +{ + + union + { + unsigned int i; + float f; + }fi; + fi.i = x; + return fi.f; +} + + diff --git a/Extras/simdmathlibrary/ppu/tests/testutils.h b/Extras/simdmathlibrary/ppu/tests/testutils.h new file mode 100644 index 000000000..c7cf78b11 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/testutils.h @@ -0,0 +1,100 @@ +/* Header file for common parts of the testsuite + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +/* FIXME: Don't use altivec style initializers. */ + +#ifndef _TESTUTILS_H_ + +#include "floatingpoint_tests.h" + +extern unsigned int hide_uint( unsigned int x ); +extern int hide_int( int x ); +extern float hide_float( float x ); +extern float make_float( unsigned int x ); + +inline vec_int4 vec_splat_int( int x ) +{ + return (vec_int4){x, x, x, x}; +} + +inline vec_float4 vec_splat_float( float x ) +{ + return (vec_float4){x, x, x, x}; +} + +inline vec_uint4 bitDiff_f4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub( refi, valsi ); + vec_int4 negdiff = vec_sub( vec_splat_int(0), diff ); + vec_uint4 lz; + + diff = vec_sel( negdiff, diff, vec_cmpgt( diff, vec_splat_int(0) ) ); + + lz = vec_sub( (vec_uint4)vec_splat_int(158), vec_sr( (vec_uint4)vec_ctf( diff, 0 ), (vec_uint4)vec_splat_int(23) ) ); + lz = vec_sel( lz, (vec_uint4)vec_splat_int(32), vec_cmpeq( diff, vec_splat_int(0) ) ); + + return vec_sub( (vec_uint4)vec_splat_int(32), lz ); +} + +inline vec_uint4 ulpDiff_f4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub( refi, valsi ); + vec_int4 negdiff = vec_sub( vec_splat_int(0), diff ); + + return (vec_uint4)( vec_sel( negdiff, diff, vec_cmpgt( diff, vec_splat_int(0) ) ) ); +} + +inline int allequal_int4( vec_int4 x, vec_int4 y ) +{ + return ( vec_all_eq( x, y ) ); +} + +inline int allequal_float4( vec_float4 x, vec_float4 y ) +{ + return ( vec_all_eq( x, y ) ); +} + +inline int allequal_ulps_float4( vec_float4 x, vec_float4 y, int tolerance ) +{ + vec_uint4 vtol = (vec_uint4)vec_splat_int( tolerance ); + vec_uint4 ulps = ulpDiff_f4( x, y ); + return vec_all_le( ulps, vtol ); +} + +inline int allequal_bits_float4( vec_float4 x, vec_float4 y, int tolerance ) +{ + vec_uint4 vtol = (vec_uint4)vec_splat_int( tolerance ); + vec_uint4 bits = bitDiff_f4( x, y ); + return vec_all_le( bits, vtol ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/tests/truncf4.c b/Extras/simdmathlibrary/ppu/tests/truncf4.c new file mode 100644 index 000000000..72444138f --- /dev/null +++ b/Extras/simdmathlibrary/ppu/tests/truncf4.c @@ -0,0 +1,93 @@ +/* Testcase for truncf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "common-test.h" +#include "testutils.h" +#include "simdmath.h" + + +int main() +{ + TEST_SET_START("20040916100012EJL","EJL", "truncf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = vec_splat_float(x0); + vec_float4 x0i_v = vec_splat_float(x0i); + vec_float4 x1_v = vec_splat_float(x1); + vec_float4 x1i_v = vec_splat_float(x1i); + vec_float4 x2_v = vec_splat_float(x2); + vec_float4 x2i_v = vec_splat_float(x2i); + vec_float4 x3_v = vec_splat_float(x3); + vec_float4 x3i_v = vec_splat_float(x3i); + vec_float4 x4_v = vec_splat_float(x4); + vec_float4 x4i_v = vec_splat_float(x4i); + vec_float4 x5_v = vec_splat_float(x5); + vec_float4 x5i_v = vec_splat_float(x5i); + + vec_float4 res_v; + + TEST_START("truncf4"); + res_v = truncf4(x0_v); + TEST_CHECK("20040916100023EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = truncf4(x1_v); + TEST_CHECK("20040916100034EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = truncf4(x2_v); + TEST_CHECK("20040916100043EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = truncf4(x3_v); + TEST_CHECK("20040916100054EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = truncf4(x4_v); + TEST_CHECK("20040916100103EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = truncf4(x5_v); + TEST_CHECK("20040916100111EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/ppu/truncf4.c b/Extras/simdmathlibrary/ppu/truncf4.c new file mode 100644 index 000000000..664bd7074 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/truncf4.c @@ -0,0 +1,39 @@ +/* truncf4 - for each of four float slots, round towards zero to integer value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +truncf4 (vector float x) +{ + return vec_trunc( x ); +} + diff --git a/Extras/simdmathlibrary/simdmath.h b/Extras/simdmathlibrary/simdmath.h new file mode 100644 index 000000000..c50b241f8 --- /dev/null +++ b/Extras/simdmathlibrary/simdmath.h @@ -0,0 +1,725 @@ +/* SIMD math library functions for both the PowerPC (PPU) and the SPU. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#ifndef ___SIMD_MATH_H____ +#define ___SIMD_MATH_H____ + +#define SIMD_MATH_HAVE_VECTOR_f4 0 +#define SIMD_MATH_HAVE_VECTOR_i4 0 +#define SIMD_MATH_HAVE_VECTOR_d2 0 +#define SIMD_MATH_HAVE_VECTOR_ll2 0 + +#ifdef __SPU__ + +/* SPU has vector float, vector double, + vector {un,}signed long long, and vector {un,signed} int. */ + +#undef SIMD_MATH_HAVE_VECTOR_f4 +#define SIMD_MATH_HAVE_VECTOR_f4 1 + +#undef SIMD_MATH_HAVE_VECTOR_i4 +#define SIMD_MATH_HAVE_VECTOR_i4 1 + +#undef SIMD_MATH_HAVE_VECTOR_d2 +#define SIMD_MATH_HAVE_VECTOR_d2 1 + +#undef SIMD_MATH_HAVE_VECTOR_ll2 +#define SIMD_MATH_HAVE_VECTOR_ll2 1 + +#elif defined(__ALTIVEC__) + +#include + +/* PPU has vector float, and vector int. */ +#undef SIMD_MATH_HAVE_VECTOR_f4 +#define SIMD_MATH_HAVE_VECTOR_f4 1 + +#undef SIMD_MATH_HAVE_VECTOR_i4 +#define SIMD_MATH_HAVE_VECTOR_i4 1 + +#else + +/* Just in case someone tries to include this in say a i686 build. */ + +#error "No functions defined" + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Types */ + +#if SIMD_MATH_HAVE_VECTOR_i4 +typedef struct divi4_s { + vector signed int quot; + vector signed int rem; +} divi4_t; +#endif + +#if SIMD_MATH_HAVE_VECTOR_i4 +typedef struct divu4_s { + vector unsigned int quot; + vector unsigned int rem; +} divu4_t; +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +typedef struct lldivi2_s { + vector signed long long quot; + vector signed long long rem; +} lldivi2_t; +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +typedef struct lldivu2_s { + vector unsigned long long quot; + vector unsigned long long rem; +} lldivu2_t; +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +typedef struct llroundf4_s { + vector signed long long vll[2]; +} llroundf4_t; +#endif + +/* integer divide */ + +#if SIMD_MATH_HAVE_VECTOR_i4 +divi4_t divi4 (vector signed int, vector signed int); +#endif + +#if SIMD_MATH_HAVE_VECTOR_i4 +divu4_t divu4 (vector unsigned int, vector unsigned int); +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +lldivi2_t lldivi2 (vector signed long long, vector signed long long); +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +lldivu2_t lldivu2 (vector unsigned long long, vector unsigned long long); +#endif + +/* abs value */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fabsf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fabsd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_i4 +vector signed int absi4 (vector signed int); +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long llabsi2 (vector signed long long); +#endif + +/* negate value */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float negatef4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double negated2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_i4 +vector signed int negatei4 (vector signed int); +#endif + +#if SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long negatell2 (vector signed long long); +#endif + +/* trunc */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float truncf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double trund2 (vector double); +#endif + +/* floor */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float floorf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double floord2 (vector double); +#endif + +/* ceil */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float ceilf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double ceild2 (vector double); +#endif + +/* exp */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float expf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double expd2 (vector double); +#endif + +/* exp */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float exp2f4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double exp2d2 (vector double); +#endif + +/* expm1 */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float expm1f4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double expm1d2 (vector double); +#endif + +/* log */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float logf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double logd2 (vector double); +#endif + +/* log10 */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float log10f4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double log10d2 (vector double); +#endif + +/* log1p */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float log1pf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double log1pd2 (vector double); +#endif + +/* fma */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fmaf4 (vector float, vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fmad2 (vector double, vector double, vector double); +#endif + +/* fmax */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fmaxf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fmaxd2 (vector double, vector double); +#endif + +/* fmin */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fminf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fmind2 (vector double, vector double); +#endif + +/* fdim */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fdimf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fdimd2 (vector double, vector double); +#endif + + +/* fmod */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float fmodf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fmodd2 (vector double, vector double); +#endif + +/* log2 */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float log2f4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double log2d2 (vector double); +#endif + +/* logb */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float logbf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double logbd2 (vector double); +#endif + +/* ilogb */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector signed int ilogbf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long ilogbd2 (vector double); +#endif + +/* modf */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float modff4 (vector float, vector float *); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double modfd2 (vector double, vector double *); +#endif + +/* sqrt */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float sqrtf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double sqrtd2 (vector double); +#endif + +/* hypot */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float hypotf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double hypotd2 (vector double, vector double); +#endif + +/* cbrtf4 */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float cbrtf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double cbrtd2 (vector double); +#endif + +/* sin */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float sinf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double sind2 (vector double); +#endif + + +/* asin */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float asinf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double asind2 (vector double); +#endif + + + +/* divide */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float divf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double divd2 (vector double, vector double); +#endif + +/* remainder */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float remainderf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double remainderd2 (vector double, vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector float remquof4(vector float x, vector float y, vector signed int *quo); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector double remquod2(vector double x, vector double y, vector signed long long *quo); +#endif + +/* copysign */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float copysignf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double copysignd2 (vector double, vector double); +#endif + +/* cos */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float cosf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double cosd2 (vector double); +#endif + +/* acos */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float acosf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double acosd2 (vector double); +#endif + +/* atan */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float atanf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double atand2 (vector double); +#endif + +/* atan2 */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float atan2f4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double atan2d2 (vector double, vector double); +#endif + + +/* tan */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float tanf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double tand2 (vector double); +#endif + +/* sincos */ +#if SIMD_MATH_HAVE_VECTOR_f4 +void sincosf4 (vector float, vector float *, vector float *); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +void sincosd2 (vector double, vector double *, vector double *); +#endif + + + +/* recip */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float recipf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double recipd2 (vector double); +#endif + + +/* rsqrt */ +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float rsqrtf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double rsqrtd2 (vector double); +#endif + + +/* frexp */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector float frexpf4 (vector float, vector signed int *); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector double frexpd2 (vector double, vector signed long long *); +#endif + +/* ldexp */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector float ldexpf4 (vector float, vector signed int ); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector double ldexpd2 (vector double, vector signed long long ); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector float scalbnf4(vector float x, vector signed int n); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector double scalbllnd2 (vector double, vector signed long long ); +#endif + + +/* isnan */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isnanf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isnand2 (vector double); +#endif + +/* isinf */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isinff4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isinfd2 (vector double); +#endif + +/* isfinite */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isfinitef4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isfinited2 (vector double); +#endif + +/* isnormal */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isnormalf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isnormald2 (vector double); +#endif + +/* isunordered */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isunorderedf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isunorderedd2 (vector double, vector double); +#endif + +/* is0denorm */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int is0denormf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long is0denormd2 (vector double); +#endif + +/* signbit */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int signbitf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long signbitd2 (vector double); +#endif + +/* isequal */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isequalf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isequald2 (vector double, vector double); +#endif + +/* islessgreater */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int islessgreaterf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long islessgreaterd2 (vector double, vector double); +#endif + +/* isless */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int islessf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long islessd2 (vector double, vector double); +#endif + +/* isgreater */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isgreaterf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isgreaterd2 (vector double, vector double); +#endif + +/* islessequal */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int islessequalf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long islessequald2 (vector double, vector double); +#endif + +/* isgreaterequal */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector unsigned int isgreaterequalf4 (vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector unsigned long long isgreaterequald2 (vector double, vector double); +#endif + +/* fpclassify */ +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector signed int fpclassifyf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long fpclassifyd2 (vector double); +#endif + +/* round */ +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long llroundd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +llroundf4_t llroundf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +llroundf4_t llrintf4 (vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +vector signed long long llrintd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float roundf4(vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector signed int iroundf4(vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float rintf4(vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +vector signed int irintf4(vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double nextafterd2 (vector double, vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float nextafterf4(vector float, vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double nearbyintd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_f4 +vector float nearbyintf4(vector float); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double truncd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double roundd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double rintd2 (vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double ceild2(vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double floord2(vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double fmodd2(vector double, vector double); +#endif + +#if SIMD_MATH_HAVE_VECTOR_d2 +vector double remainderd2(vector double, vector double); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Extras/simdmathlibrary/spu/Makefile b/Extras/simdmathlibrary/spu/Makefile new file mode 100644 index 000000000..c1b4d1813 --- /dev/null +++ b/Extras/simdmathlibrary/spu/Makefile @@ -0,0 +1,131 @@ +# make file to build the libsimdmath library for SPU +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +# All that you do to add a file is edit OBJS, the rest will just work + +prefix = /usr +DESTDIR = + +OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \ + is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \ + sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \ + rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \ + negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \ + cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \ + fdimf4.o fmodf4.o negatei4.o logf4.o log1pf4.o log10f4.o expm1f4.o \ + expf4.o divi4.o exp2f4.o powf4.o atanf4.o atan2f4.o acosf4.o ilogbf4.o ilogbd2.o \ + logbf4.o logbd2.o llroundd2.o llroundf4.o llrintf4.o isequalf4.o isequald2.o \ + islessgreaterf4.o islessgreaterd2.o islessf4.o islessd2.o isgreaterf4.o \ + isgreaterd2.o islessequalf4.o islessequald2.o isgreaterequalf4.o isgreaterequald2.o \ + isfinitef4.o isfinited2.o isnormalf4.o isnormald2.o isunorderedf4.o isunorderedd2.o \ + llrintd2.o roundf4.o rintf4.o irintf4.o iroundf4.o fmad2.o fmaxd2.o fmind2.o fdimd2.o \ + nextafterd2.o fpclassifyf4.o fpclassifyd2.o nearbyintd2.o nextafterf4.o nearbyintf4.o \ + llabsi2.o truncd2.o roundd2.o rintd2.o negatell2.o divu4.o modfd2.o lldivu2.o \ + ceild2.o floord2.o ldexpd2.o scalbnf4.o scalbllnd2.o lldivi2.o remquof4.o remquod2.o\ + fmodd2.o remainderd2.o + + +INCLUDES_SPU = -I../ + +CROSS_SPU = spu- +AR_SPU = $(CROSS_SPU)ar +CC_SPU = $(CROSS_SPU)gcc +CXX_SPU = $(CROSS_SPU)g++ +RANLIB_SPU = $(CROSS_SPU)ranlib +TEST_CMD_SPU = + +CFLAGS_SPU=$(INCLUDES_SPU) -O2 -W -Wall + +INSTALL = install + +MAKE_DEFS = \ + prefix='$(prefix)' \ + DESTDIR='$(DESTDIR)' \ + LIB_BASE='$(LIB_BASE)' \ + LIB_NAME='$(LIB_NAME)' \ + STATIC_LIB='$(STATIC_LIB)' \ + CROSS_SPU='$(CROSS_SPU)' \ + AR_SPU='$(AR_SPU)' \ + CC_SPU='$(CC_SPU)' \ + CXX_SPU='$(CXX_SPU)' \ + RANLIB_SPU='$(RANLIB_SPU)' \ + TEST_CMD_SPU='$(TEST_CMD_SPU)' \ + INSTALL='$(INSTALL)' + +LIB_BASE = simdmath +LIB_NAME = lib$(LIB_BASE) +STATIC_LIB = $(LIB_NAME).a + +all: $(STATIC_LIB) + +$(STATIC_LIB): $(OBJS) + $(AR_SPU) cr $@ $(OBJS) + $(RANLIB_SPU) $@ + +install: $(STATIC_LIB) + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include + $(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/ + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib + $(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB) + +clean: + cd tests; $(MAKE) $(MAKE_DEFS) clean + rm -f $(OBJS) + rm -f $(STATIC_LIB) + +$(OBJS): ../simdmath.h + +check: $(STATIC_LIB) + cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check + + +# Some Objects have special header files. +sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h +lldivu2.o lldivi2.o : lldiv.h + + + +%.o: %.c + $(CC_SPU) $(CFLAGS_SPU) -c $< + +#---------- +# C++ +#---------- +%.o: %.C + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cpp + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cc + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cxx + $(CXX_SPU) $(CFLAGS_SPU) -c $< + diff --git a/Extras/simdmathlibrary/spu/absi4.c b/Extras/simdmathlibrary/spu/absi4.c new file mode 100644 index 000000000..fd6ee0d63 --- /dev/null +++ b/Extras/simdmathlibrary/spu/absi4.c @@ -0,0 +1,40 @@ +/* absi4 - for each of four integer slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed int +absi4 (vector signed int x) +{ + vec_int4 neg; + neg = spu_sub( 0, x ); + return spu_sel( neg, x, spu_cmpgt( x, -1 ) ); +} + diff --git a/Extras/simdmathlibrary/spu/acosf4.c b/Extras/simdmathlibrary/spu/acosf4.c new file mode 100644 index 000000000..296bf86a8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/acosf4.c @@ -0,0 +1,78 @@ +/* acosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +// +// Computes the inverse cosine of all four slots of x +// +vector float +acosf4 (vector float x) +{ + vec_float4 result, xabs; + vec_float4 t1; + vec_float4 xabs2, xabs4; + vec_float4 hi, lo; + vec_float4 neg, pos; + vec_uint4 select; + + xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1)); + select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31)); + + t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + xabs2 = spu_mul(xabs, xabs); + xabs4 = spu_mul(xabs2, xabs2); + hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f)); + hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f)); + hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f)); + lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f)); + lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f)); + lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f)); + + result = spu_madd(hi, xabs4, lo); + + /* Adjust the result if x is negactive. + */ + neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f)); + pos = spu_mul(t1, result); + + result = spu_sel(pos, neg, select); + + return result; +} + + diff --git a/Extras/simdmathlibrary/spu/asinf4.c b/Extras/simdmathlibrary/spu/asinf4.c new file mode 100644 index 000000000..460abdb53 --- /dev/null +++ b/Extras/simdmathlibrary/spu/asinf4.c @@ -0,0 +1,85 @@ +/* asinf4 - Computes the inverse sine of all four slots of x + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +asinf4 (vector float x) +{ + // positive = (x > 0) + // + vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f)); + + // gtHalf = (|x| > 0.5) + // + vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f)); + + // x = absf(x) + // + x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff)); + + + // if (x > 0.5) + // g = 0.5 - 0.5*x + // x = -2 * sqrtf(g) + // else + // g = x * x + // + vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf); + + x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf); + + // Compute the polynomials and take their ratio + // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f + // num = x * g * (-0.504400557f * g + 0.933933258f) + // + vec_float4 denom = spu_add(g,spu_splats(-5.54846723f)); + vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f)); + denom = spu_madd(denom,g,spu_splats(5.603603363f)); + num = spu_mul(spu_mul(x,g),num); + + + // x = x + num / denom + // + x = spu_add(x,divf4(num,denom)); + + // if (x > 0.5) + // x = x + M_PI_2 + // + x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf); + + + // if (!positive) x = -x + // + x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive); + + return x; +} + diff --git a/Extras/simdmathlibrary/spu/atan2f4.c b/Extras/simdmathlibrary/spu/atan2f4.c new file mode 100644 index 000000000..da0eef280 --- /dev/null +++ b/Extras/simdmathlibrary/spu/atan2f4.c @@ -0,0 +1,60 @@ +/* atan2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +// +// Inverse tangent function of two variables +// +vector float +atan2f4 (vector float y, vector float x) +{ + vec_float4 res = atanf4(divf4(y,x)); + + // Use the arguments to determine the quadrant of the result: + // if (x < 0) + // if (y < 0) + // res = -PI + res + // else + // res = PI + res + // + vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y); + vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x); + + vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg); + + vec_float4 newRes = spu_add(bias, res); + + res = spu_sel(res,newRes,xNeg); + + return res; +} diff --git a/Extras/simdmathlibrary/spu/atanf4.c b/Extras/simdmathlibrary/spu/atanf4.c new file mode 100644 index 000000000..53bf3b415 --- /dev/null +++ b/Extras/simdmathlibrary/spu/atanf4.c @@ -0,0 +1,76 @@ +/* atanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +// +// Computes the inverse tangent of all four slots of x. +// +vector float +atanf4 (vector float x) +{ + vec_float4 bias; + vec_float4 x2, x3, x4, x8, x9; + vec_float4 hi, lo; + vec_float4 result; + vec_float4 inv_x; + vec_uint4 sign; + vec_uint4 select; + + sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31); + inv_x = recipf4(x); + inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u)); + + select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f)); + bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f))); + bias = (vec_float4)spu_and((vec_uint4)bias, select); + + x = spu_sel(x, inv_x, select); + + bias = spu_add(bias, x); + x2 = spu_mul(x, x); + x3 = spu_mul(x2, x); + x4 = spu_mul(x2, x2); + x8 = spu_mul(x4, x4); + x9 = spu_mul(x8, x); + hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f)); + hi = spu_madd(hi, x2, spu_splats(0.0429096138f)); + hi = spu_madd(hi, x2, spu_splats(-0.0752896400f)); + hi = spu_madd(hi, x2, spu_splats(0.1065626393f)); + lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f)); + lo = spu_madd(lo, x2, spu_splats(-0.3333314528f)); + lo = spu_madd(lo, x3, bias); + + result = spu_madd(hi, x9, lo); + + return result; +} + diff --git a/Extras/simdmathlibrary/spu/cbrtf4.c b/Extras/simdmathlibrary/spu/cbrtf4.c new file mode 100644 index 000000000..95d18a968 --- /dev/null +++ b/Extras/simdmathlibrary/spu/cbrtf4.c @@ -0,0 +1,105 @@ +/* cbrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +#define __calcQuot(xexp) n = xexp; \ + vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n); \ + n = spu_sel(n, spu_add(n,2), negxexpmask); \ + \ + quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); \ + quot = spu_add(quot, spu_rlmaska(quot, -4)); \ + quot = spu_add(quot, spu_rlmaska(quot, -8)); \ + quot = spu_add(quot, spu_rlmaska(quot,-16)); \ + vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); \ + quot = spu_add( \ + quot, \ + spu_rlmaska( \ + spu_add( \ + spu_add(r,5), \ + spu_sl (r,2) \ + ), \ + -4 \ + ) \ + ); \ + +#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3) +#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) + +vector float +cbrtf4 (vector float x) +{ + vec_float4 zeros = spu_splats(0.0f); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros); + vec_int4 xexp, n; + vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF); + vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + x = spu_and(x, sgnmask); + + x = frexpf4(x, &xexp); + vec_float4 p = spu_madd( + spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)), + x, + spu_splats(0.492659620528969547f) + ); + vec_float4 p3 = spu_mul(p, spu_mul(p, p)); + vec_int4 quot; + __calcQuot(xexp); + vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient + vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2)); + factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1)); + factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0)); + factor = spu_sel(factor, spu_splats((float)( _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1)); + factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2)); + + vec_float4 pre = spu_mul(p, factor); + vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3); + vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x ); + vec_float4 res = spu_mul(pre, divf4(numr, denr)); + res = ldexpf4(res, quot); + + return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask), + zeros, + zeromask); +} + +/* +_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x)) +{ + vec_uchar16 neg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); + vec_float4 absx = spu_andc(x, sbit); + vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx))); + res = spu_sel(res, spu_or(sbit, res), neg); + return res; +} +*/ diff --git a/Extras/simdmathlibrary/spu/ceild2.c b/Extras/simdmathlibrary/spu/ceild2.c new file mode 100644 index 000000000..8bbad0ea7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/ceild2.c @@ -0,0 +1,94 @@ +/* ceild2 - for each of two doule slots, round up to smallest integer not less than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +ceild2(vector double in) +{ + vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); + vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_uint4 one = ((vec_uint4){0, 1, 0, 1}); + vec_int4 exp, shift; + vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0; + vec_ullong2 sign = spu_splats(0x8000000000000000ULL); + vec_double2 in_hi, out; + vec_double2 one_d = spu_splats((double)1.0); + vec_uint4 zero = spu_splats((unsigned int)0x0); + + /* This function generates the following component + * based upon the inputs. + * + * mask = bits of the input that need to be replaced. + * insert = value of the bits that need to be replaced + * addend = value to be added to perform function. + * + * These are applied as follows:. + * + * out = ((in & mask) | insert) + addend + */ + + in_hi = spu_shuffle(in, in, splat_hi); + exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF); + shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp); + + /* clamp shift to the range 0 to -31. + */ + shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32)); + frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift); + exp_ge0 = spu_cmpgt(exp, 0x3FE); + mask = spu_orc(frac_mask, exp_ge0); + + /* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0 + */ + mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4)); + pos = spu_cmpgt((vec_int4)in_hi, -1); + //pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work + equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0); + addend = spu_andc(spu_and(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words))); + + /* insert + */ + e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero); + e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words)); + // e_sign = spu_sel(spu_splats((unsigned int)0x0), (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero)); + e_sign = spu_and( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero)); + insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0); + + /* replace insert + */ + in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign)); + + /* in + addend + */ + out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4)); + + return (out); +} diff --git a/Extras/simdmathlibrary/spu/ceilf4.c b/Extras/simdmathlibrary/spu/ceilf4.c new file mode 100644 index 000000000..90bc1817d --- /dev/null +++ b/Extras/simdmathlibrary/spu/ceilf4.c @@ -0,0 +1,54 @@ +/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +ceilf4 (vector float x) +{ + vec_int4 xi, xi1; + vec_uint4 inrange; + vec_float4 truncated, truncated1; + + // Find truncated value and one greater. + + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + xi1 = spu_add( xi, 1 ); + + truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); + truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); + + // If truncated value is less than input, add one. + + return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) ); +} + diff --git a/Extras/simdmathlibrary/spu/copysignd2.c b/Extras/simdmathlibrary/spu/copysignd2.c new file mode 100644 index 000000000..335271ff2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/copysignd2.c @@ -0,0 +1,39 @@ +/* copysignd2 - for each of two double slots, return value with magnitude from x and sign from y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +vector double copysignd2 (vector double x, vector double y) +{ + return spu_sel( x, y, spu_splats(0x8000000000000000ull) ); +} + diff --git a/Extras/simdmathlibrary/spu/copysignf4.c b/Extras/simdmathlibrary/spu/copysignf4.c new file mode 100644 index 000000000..d58f6c1b5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/copysignf4.c @@ -0,0 +1,39 @@ +/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float +copysignf4 (vector float x, vector float y) +{ + return spu_sel( x, y, spu_splats(0x80000000) ); +} + diff --git a/Extras/simdmathlibrary/spu/cosd2.c b/Extras/simdmathlibrary/spu/cosd2.c new file mode 100644 index 000000000..8d1d964f6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/cosd2.c @@ -0,0 +1,127 @@ +/* cosd2 - Computes the cosine of the each of two double slots. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "sincos_c.h" + +vector double +cosd2 (vector double x) +{ + vec_double2 xl,xl2,xl3,res; + vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull); + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); + vec_float4 xf = spu_roundtf(xl); + vec_int4 q = spu_convts(xf,0); + q = spu_shuffle(q,q,copyEven); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3))); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_double2 qd = spu_extend(qf); + vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x); + xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1); + + // Check if |xl| is a really small number + // + vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull)); + vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and + // sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5) + // + + vec_double2 ct0 = spu_mul(xl2,xl2); + vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D)); + vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D)); + vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D)); + vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D)); + vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D)); + vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D)); + vec_double2 ct4 = spu_madd(ct2,ct0,ct3); + vec_double2 st4 = spu_madd(st2,ct0,st3); + vec_double2 ct5 = spu_mul(ct0,ct0); + + vec_double2 ct6 = spu_madd(ct5,ct1,ct4); + vec_double2 st6 = spu_madd(ct5,st1,st4); + + vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0)); + vec_double2 sx = spu_madd(st6,xl3,xl); + + // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0 + // + sx = spu_sel(sx,xl,isTiny); + cx = spu_sel(cx,spu_splats(1.0),isTiny); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); + res = spu_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); + mask2 = spu_shuffle(mask2,mask2,copyEven); + res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2); + // if input = +/-Inf return NAN + // + res = spu_sel(res, nan, isnand2 (x)); + + // if input = 0 or denorm return or 1.0 + // + vec_ullong2 zeroMask = is0denormd2 (x); + res = spu_sel(res,spu_splats(1.0),zeroMask); + return res; +} + + diff --git a/Extras/simdmathlibrary/spu/cosf4.c b/Extras/simdmathlibrary/spu/cosf4.c new file mode 100644 index 000000000..64a6594d7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/cosf4.c @@ -0,0 +1,94 @@ +/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sincos_c.h" + +vector float +cosf4 (vector float x) +{ + vec_float4 xl,xl2,xl3,res; + vec_int4 q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3))); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); + xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); + vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); + + vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); + vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); + + vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); + res = spu_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); + res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2); + + return res; + +} diff --git a/Extras/simdmathlibrary/spu/divd2.c b/Extras/simdmathlibrary/spu/divd2.c new file mode 100644 index 000000000..f52da5d51 --- /dev/null +++ b/Extras/simdmathlibrary/spu/divd2.c @@ -0,0 +1,41 @@ +/* divd2 - for each of two double slots, divide numer by denom. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +// Equal to numer * recipd2(denom) +// See recipd2 for results of special values. + +#include +#include + +vector double +divd2 (vector double numer, vector double denom) +{ + return spu_mul( numer, recipd2( denom ) ); +} + diff --git a/Extras/simdmathlibrary/spu/divf4.c b/Extras/simdmathlibrary/spu/divf4.c new file mode 100644 index 000000000..670b03f80 --- /dev/null +++ b/Extras/simdmathlibrary/spu/divf4.c @@ -0,0 +1,46 @@ +/* divf4 - for each of four float slots, divide numer by denom. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +divf4 (vector float numer, vector float denom) +{ + // Reciprocal estimate and 1 Newton-Raphson iteration. + // Uses constant of 1.0 + 1 ulp to improve accuracy. + + vector float y0, y0numer; + vector float oneish = (vector float)spu_splats(0x3f800001); + + y0 = spu_re( denom ); + y0numer = spu_mul( numer, y0 ); + return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer ); +} + diff --git a/Extras/simdmathlibrary/spu/divi4.c b/Extras/simdmathlibrary/spu/divi4.c new file mode 100644 index 000000000..aa03d585b --- /dev/null +++ b/Extras/simdmathlibrary/spu/divi4.c @@ -0,0 +1,109 @@ +/* divi4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom +// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +divi4_t divi4 (vector signed int numer, vector signed int denom) +{ + divi4_t res; + vec_int4 quot, newQuot, shift; + vec_uint4 numerPos, denomPos, quotNeg; + vec_uint4 numerAbs, denomAbs; + vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted; + vec_uint4 newNum, skip, cont; + int anyCont; + + // Determine whether result needs sign change + + numerPos = spu_cmpgt( numer, -1 ); + denomPos = spu_cmpgt( denom, -1 ); + quotNeg = spu_xor( numerPos, denomPos ); + + // Use absolute values of numerator, denominator + + numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos ); + denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos ); + + // Get difference of leading zeros. + // Any possible negative value will be interpreted as a shift > 31 + + denomZeros = spu_cntlz( denomAbs ); + numerZeros = spu_cntlz( numerAbs ); + + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + + // Shift denom to align leading one with numerator's + + denomShifted = spu_sl( denomAbs, (vec_uint4)shift ); + oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift ); + oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) ); + + // Shift left all leading zeros. + + denomLeft = spu_sl( denomAbs, denomZeros ); + oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros ); + + quot = spu_splats(0); + + do + { + cont = spu_cmpgt( oneShifted, 0U ); + anyCont = spu_extract( spu_gather( cont ), 0 ); + + newQuot = spu_or( quot, (vec_int4)oneShifted ); + + // Subtract shifted denominator from remaining numerator + // when denominator is not greater. + + skip = spu_cmpgt( denomShifted, numerAbs ); + newNum = spu_sub( numerAbs, denomShifted ); + + // If denominator is greater, next shift is one more, otherwise + // next shift is number of leading zeros of remaining numerator. + + numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip ); + shift = (vec_int4)spu_sub( skip, numerZeros ); + + oneShifted = spu_rlmask( oneLeft, shift ); + denomShifted = spu_rlmask( denomLeft, shift ); + + quot = spu_sel( newQuot, quot, skip ); + numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) ); + } + while ( anyCont ); + + res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg ); + res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos ); + return res; +} + diff --git a/Extras/simdmathlibrary/spu/divu4.c b/Extras/simdmathlibrary/spu/divu4.c new file mode 100644 index 000000000..f6d5342b4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/divu4.c @@ -0,0 +1,97 @@ +/* divu4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom +// and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +divu4_t divu4 (vector unsigned int numer, vector unsigned int denom) +{ + divu4_t res; + vec_int4 shift; + vec_uint4 quot, newQuot; + vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted; + vec_uint4 newNum, skip, cont; + int anyCont; + + // Get difference of leading zeros. + // Any possible negative value will be interpreted as a shift > 31 + + denomZeros = spu_cntlz( denom ); + numerZeros = spu_cntlz( numer ); + + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + + // Shift denom to align leading one with numerator's + + denomShifted = spu_sl( denom, (vec_uint4)shift ); + oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift ); + oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) ); + + // Shift left all leading zeros. + + denomLeft = spu_sl( denom, denomZeros ); + oneLeft = spu_sl( spu_splats(1U), denomZeros ); + + quot = spu_splats(0U); + + do + { + cont = spu_cmpgt( oneShifted, 0U ); + anyCont = spu_extract( spu_gather( cont ), 0 ); + + newQuot = spu_or( quot, oneShifted ); + + // Subtract shifted denominator from remaining numerator + // when denominator is not greater. + + skip = spu_cmpgt( denomShifted, numer ); + newNum = spu_sub( numer, denomShifted ); + + // If denominator is greater, next shift is one more, otherwise + // next shift is number of leading zeros of remaining numerator. + + numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip ); + shift = (vec_int4)spu_sub( skip, numerZeros ); + + oneShifted = spu_rlmask( oneLeft, shift ); + denomShifted = spu_rlmask( denomLeft, shift ); + + quot = spu_sel( newQuot, quot, skip ); + numer = spu_sel( newNum, numer, spu_orc(skip,cont) ); + } + while ( anyCont ); + + res.quot = quot; + res.rem = numer; + return res; +} + diff --git a/Extras/simdmathlibrary/spu/exp2f4.c b/Extras/simdmathlibrary/spu/exp2f4.c new file mode 100644 index 000000000..88354bfd1 --- /dev/null +++ b/Extras/simdmathlibrary/spu/exp2f4.c @@ -0,0 +1,131 @@ +/* exp2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +/* + * FUNCTION + * vec_float4 _exp2_v(vec_float4 x) + * + * DESCRIPTION + * _exp2_v computes 2 raised to the input vector x. Computation is + * performed by observing the 2^(a+b) = 2^a * 2^b. + * We decompose x into a and b (above) by letting. + * a = ceil(x), b = x - a; + * + * 2^a is easilty computed by placing a into the exponent + * or a floating point number whose mantissa is all zeros. + * + * 2^b is computed using the following polynomial approximation. + * (C. Hastings, Jr, 1955). + * + * __7__ + * \ + * \ + * 2^(-x) = / Ci*x^i + * /____ + * i=1 + * + * for x in the range 0.0 to 1.0 + * + * C0 = 1.0 + * C1 = -0.9999999995 + * C2 = 0.4999999206 + * C3 = -0.1666653019 + * C4 = 0.0416573475 + * C5 = -0.0083013598 + * C6 = 0.0013298820 + * C7 = -0.0001413161 + * + * This function does not handle out of range conditions. It + * assumes that x is in the range (-128.0, 127.0]. Values outside + * this range will produce undefined results. + */ + + +#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */ + +vector float +exp2f4 (vector float x) +{ + vec_int4 ix; + vec_uint4 overflow, underflow; + vec_float4 frac, frac2, frac4; + vec_float4 exp_int, exp_frac; + vec_float4 result; + vec_float4 hi, lo; + + vec_float4 bias; + /* Break in the input x into two parts ceil(x), x - ceil(x). + */ + bias = (vec_float4)(spu_rlmaska((vec_int4)(x), -31)); + bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias)); + ix = spu_convts(spu_add(x, bias), 0); + frac = spu_sub(spu_convtf(ix, 0), x); + frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2)); + + // !!! HRD Changing weird un-understandable and incorrect overflow handling code + //overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000)); + overflow = spu_cmpgt(x, (vec_float4)spu_splats(0x4300FFFFu)); // !!! Biggest possible exponent to fit in range. + underflow = spu_cmpgt(spu_splats(-126.0f), x); + + //exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for + // !!! overflow (x >= 127.999999f) + exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation + exp_int = spu_add(exp_int, exp_int); // !!! HRD + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + frac2 = spu_mul(frac, frac); + frac4 = spu_mul(frac2, frac2); + + hi = spu_madd(frac, spu_splats(-0.0001413161f), spu_splats(0.0013298820f)); + hi = spu_madd(frac, hi, spu_splats(-0.0083013598f)); + hi = spu_madd(frac, hi, spu_splats(0.0416573475f)); + lo = spu_madd(frac, spu_splats(-0.1666653019f), spu_splats(0.4999999206f)); + lo = spu_madd(frac, lo, spu_splats(-0.9999999995f)); + lo = spu_madd(frac, lo, spu_splats(1.0f)); + + exp_frac = spu_madd(frac4, hi, lo); + ix = spu_add(ix, spu_rlmask((vec_int4)(exp_frac), -23)); + result = spu_mul(exp_frac, exp_int); + + /* Handle overflow */ + result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow); + result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow); + //result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255)); + + return (result); +} diff --git a/Extras/simdmathlibrary/spu/expf4.c b/Extras/simdmathlibrary/spu/expf4.c new file mode 100644 index 000000000..e5ca4ec7d --- /dev/null +++ b/Extras/simdmathlibrary/spu/expf4.c @@ -0,0 +1,63 @@ +/* expm1f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +#define _EXPF_H_C1 ((float)-0.6931470632553101f) +#define _EXPF_H_C2 ((float)-1.1730463525082e-7f) + +#define _EXPF_H_INVLN2 ((float)1.4426950408889634f) + +vector float +expf4 (vector float x) +{ + vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + vec_float4 goffset = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask); + vec_float4 g = spu_mul(x, spu_splats(_EXPF_H_INVLN2)); + vec_int4 xexp = spu_convts(spu_add(g, goffset),0); + + g = spu_convtf(xexp, 0); + g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x)); + vec_float4 z = spu_mul(g, g); + vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f)); + vec_float4 b = spu_mul(g, + spu_madd(z, + spu_splats((float)0.0083208258f), + spu_splats((float)0.4999999992f) + ) + ); + + vec_float4 foo = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)), + spu_add(spu_splats(1.0f), spu_sub(a, b))); + + return ldexpf4(foo, xexp); + +} diff --git a/Extras/simdmathlibrary/spu/expm1f4.c b/Extras/simdmathlibrary/spu/expm1f4.c new file mode 100644 index 000000000..b2dde1419 --- /dev/null +++ b/Extras/simdmathlibrary/spu/expm1f4.c @@ -0,0 +1,54 @@ +/* expm1f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f) +#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f) + +vector float +expm1f4 (vector float x) +{ + vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)), + spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x)); + vec_float4 x2 = spu_mul(x,x); + vec_float4 d0, d1, n0, n1; + + d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f)); + d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0); + + n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f)); + n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f)); + n1 = spu_madd(x2, n1, n0); + + return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)), + spu_mul(x, divf4(n1, d1)), + nearzeromask); +} diff --git a/Extras/simdmathlibrary/spu/fabsd2.c b/Extras/simdmathlibrary/spu/fabsd2.c new file mode 100644 index 000000000..26b155f59 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fabsd2.c @@ -0,0 +1,37 @@ +/* fabsd2 - for each of two double slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector double fabsd2 (vector double x) +{ + return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); +} diff --git a/Extras/simdmathlibrary/spu/fabsf4.c b/Extras/simdmathlibrary/spu/fabsf4.c new file mode 100644 index 000000000..4086b12d8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fabsf4.c @@ -0,0 +1,37 @@ +/* fabsf4 - for each of 4 float slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float fabsf4 (vector float x) +{ + return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) ); +} + diff --git a/Extras/simdmathlibrary/spu/fdimd2.c b/Extras/simdmathlibrary/spu/fdimd2.c new file mode 100644 index 000000000..12e7a3484 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fdimd2.c @@ -0,0 +1,46 @@ +/* fdimd2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +/* fdim_v - compute the positive difference of x and y. + */ +vector double +fdimd2 (vector double x, vector double y) +{ + vec_double2 v; + vec_uint4 mask; + + v = spu_sub(x, y); + mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31)); + + return (v); +} diff --git a/Extras/simdmathlibrary/spu/fdimf4.c b/Extras/simdmathlibrary/spu/fdimf4.c new file mode 100644 index 000000000..71446bd05 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fdimf4.c @@ -0,0 +1,38 @@ +/* fdimf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +fdimf4 (vector float x, vector float y) +{ + vec_float4 diff = spu_sub(x,y); + return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y)); +} diff --git a/Extras/simdmathlibrary/spu/floord2.c b/Extras/simdmathlibrary/spu/floord2.c new file mode 100644 index 000000000..648a84dd3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/floord2.c @@ -0,0 +1,94 @@ +/* floord2 - for each of two doule slots, round up to smallest integer not more than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +floord2(vector double in) +{ + vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); + vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_uint4 one = ((vec_uint4){0, 1, 0, 1}); + vec_int4 exp, shift; + vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0; + vec_ullong2 sign = spu_splats(0x8000000000000000ULL); + vec_double2 in_hi, out; + vec_double2 one_d = spu_splats((double)1.0); + vec_uint4 zero = spu_splats((unsigned int)0x0); + + /* This function generates the following component + * based upon the inputs. + * + * mask = bits of the input that need to be replaced. + * insert = value of the bits that need to be replaced + * addend = value to be added to perform function. + * + * These are applied as follows:. + * + * out = ((in & mask) | insert) + addend + */ + + in_hi = spu_shuffle(in, in, splat_hi); + exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF); + shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp); + + /* clamp shift to the range 0 to -31. + */ + shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32)); + frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift); + exp_ge0 = spu_cmpgt(exp, 0x3FE); + mask = spu_orc(frac_mask, exp_ge0); + + /* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0 + */ + mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4)); + pos = spu_cmpgt((vec_int4)in_hi, -1); + //pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work + equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0); + addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words))); + + /* insert + */ + e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero); + e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words)); + // e_sign = spu_sel((vec_uint4)one_d, zero, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero)); + e_sign = spu_andc( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero)); + insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0); + + /* replace insert + */ + in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign)); + + /* in + addend + */ + out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4)); + + return (out); +} diff --git a/Extras/simdmathlibrary/spu/floorf4.c b/Extras/simdmathlibrary/spu/floorf4.c new file mode 100644 index 000000000..03d018f3d --- /dev/null +++ b/Extras/simdmathlibrary/spu/floorf4.c @@ -0,0 +1,54 @@ +/* floorf4 - for each of four float slots, round down to largest integer not greater than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +floorf4 (vector float x) +{ + vec_int4 xi, xi1; + vec_uint4 inrange; + vec_float4 truncated, truncated1; + + // Find truncated value and one less. + + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + xi1 = spu_add( xi, -1 ); + + truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); + truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); + + // If truncated value is greater than input, subtract one. + + return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) ); +} + diff --git a/Extras/simdmathlibrary/spu/fmad2.c b/Extras/simdmathlibrary/spu/fmad2.c new file mode 100644 index 000000000..32e7e6c18 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmad2.c @@ -0,0 +1,37 @@ +/* fmad2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +fmad2 (vector double x, vector double y, vector double z) +{ + return spu_madd(x,y,z); +} diff --git a/Extras/simdmathlibrary/spu/fmaf4.c b/Extras/simdmathlibrary/spu/fmaf4.c new file mode 100644 index 000000000..0c54d1e10 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmaf4.c @@ -0,0 +1,38 @@ +/* fmaf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +vector float +fmaf4 (vector float x, vector float y, vector float z) +{ + return spu_madd(x,y,z); +} diff --git a/Extras/simdmathlibrary/spu/fmaxd2.c b/Extras/simdmathlibrary/spu/fmaxd2.c new file mode 100644 index 000000000..0ffd4628d --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmaxd2.c @@ -0,0 +1,68 @@ +/* fmaxd2 - for each of two double slots, compute maximum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +/* Return the maximum numeric value of their arguments. If one argument + * is a NaN, fmax returns the other value. If both are NaNs, then a NaN + * is returned. + */ + +vector double +fmaxd2 (vector double x, vector double y) +{ + vec_ullong2 selector, denorm; + vec_double2 x_offset, y_offset, diff; + vec_uint4 nan_x, abs_x, gt, eq; + vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0}; + vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0}; + vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0}; + + /* If both x and y are denorm or zero, then set 0x3ff to exponent + */ + denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0); + x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm); + y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm); + + /* If x is a NaN, then select y as max + */ + abs_x = spu_andc((vec_uint4)x, sign); + gt = spu_cmpgt(abs_x, infinity); + eq = spu_cmpeq(abs_x, infinity); + nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); + + diff = spu_sub(x_offset, y_offset); + selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1)); + selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11})); + + return spu_sel(x, y, selector); +} + diff --git a/Extras/simdmathlibrary/spu/fmaxf4.c b/Extras/simdmathlibrary/spu/fmaxf4.c new file mode 100644 index 000000000..1d785125c --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmaxf4.c @@ -0,0 +1,40 @@ +/* fmaxf4 - for each of four float slots, compute maximum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +vector float +fmaxf4 (vector float x, vector float y) +{ + return spu_sel( x, y, spu_cmpgt( y, x ) ); +} + diff --git a/Extras/simdmathlibrary/spu/fmind2.c b/Extras/simdmathlibrary/spu/fmind2.c new file mode 100644 index 000000000..47b0a060c --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmind2.c @@ -0,0 +1,67 @@ +/* fmind2 - for each of two double slots, compute minimum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +/* Return the minimum numeric value of their arguments. If one argument + * is a NaN, fmin returns the other value. If both are NaNs, then a NaN + * is returned. + */ + +vector double +fmind2 (vector double x, vector double y) +{ + vec_ullong2 selector, denorm; + vec_double2 x_offset, y_offset, diff; + vec_uint4 nan_x, abs_x, gt, eq; + vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0}; + vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0}; + vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0}; + + /* If both x and y are denorm or zero, then set 0x3ff to exponent + */ + denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0); + x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm); + y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm); + + /* If x is a NaN, then select y as min + */ + abs_x = spu_andc((vec_uint4)x, sign); + gt = spu_cmpgt(abs_x, infinity); + eq = spu_cmpeq(abs_x, infinity); + nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); + + diff = spu_sub(y_offset, x_offset); + selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1)); + selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11})); + + return spu_sel(x, y, selector); +} + diff --git a/Extras/simdmathlibrary/spu/fminf4.c b/Extras/simdmathlibrary/spu/fminf4.c new file mode 100644 index 000000000..7963b7f85 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fminf4.c @@ -0,0 +1,40 @@ +/* fminf4 - for each of four float slots, compute minimum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + + +vector float +fminf4 (vector float x, vector float y) +{ + return spu_sel( x, y, spu_cmpgt( x, y ) ); +} + diff --git a/Extras/simdmathlibrary/spu/fmodd2.c b/Extras/simdmathlibrary/spu/fmodd2.c new file mode 100644 index 000000000..40f7e8125 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmodd2.c @@ -0,0 +1,302 @@ +/* fmodd2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +/* + * a vector is returned that contains the remainder of xi/yi, + * for coresponding elements of vector double x and vector double y, + * as described below: + * if yi is 0, the result is 0 + * otherwise, the funciton determines the unique signed integer value i + * such that the returned element is xi - i * yi with the same sign as xi and + * magnitude less than |yi| + */ + +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb); + +vector double fmodd2(vector double x, vector double y) +{ + int shift0, shift1; + vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; + vec_uchar16 propagate = (vec_uchar16){4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192}; + vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}; + vec_uchar16 merge = (vec_uchar16){8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31}; + vec_int4 n, shift, power; + vec_uint4 z; + vec_uint4 x_hi, y_hi; + vec_uint4 abs_x, abs_y; + vec_uint4 exp_x, exp_y; + vec_uint4 zero_x, zero_y; + vec_uint4 mant_x, mant_x0, mant_x1, mant_y ; + vec_uint4 norm, denorm, norm0, norm1, denorm0, denorm1; + vec_uint4 result, result0, resultx, cnt, sign, borrow, mask; + vec_uint4 x_7ff, x_inf, x_nan, y_7ff, y_inf, y_nan, is_normal; + vec_uint4 x_is_norm, y_is_norm, frac_x, frac_y, cnt_x, cnt_y, mant_x_norm, mant_y_norm; + vec_uint4 mant_x_denorm0, mant_x_denorm1, mant_x_denorm; + vec_uint4 mant_y_denorm0, mant_y_denorm1, mant_y_denorm; + vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL)); + vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL)); + vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)); + + sign = spu_and((vec_uint4)x, sign_mask); + + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_y = spu_andc((vec_uint4)y, sign_mask); + + x_hi = spu_shuffle(abs_x, abs_x, splat_hi); + y_hi = spu_shuffle(abs_y, abs_y, splat_hi); + + exp_x = spu_rlmask(x_hi, -20); + exp_y = spu_rlmask(y_hi, -20); + + // y>x + resultx = _vec_gt64(abs_y, abs_x); + + //is Inf, is Nan + x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff)); + x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); + x_nan = spu_andc(x_7ff, x_inf); + + y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff)); + y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); + y_nan = spu_andc(y_7ff, y_inf); + + // is zero + zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0)); + zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0)); + + + /* Determine ilogb of abs_x and abs_y and + * extract the mantissas (mant_x, mant_y) + */ + /* change form*/ + // 0 -> ! is_normal + // 0 don't care (because (x=0, y!=0)match xx || y<=x + result = spu_sel(spu_andc(result, spu_rlmask(result0, -1)), + (vec_uint4)x, resultx); + //y=+-inf => 0 + result = spu_sel(result, (vec_uint4)x, y_inf); + //x=+-inf => NaN + result = spu_sel(result, ((vec_uint4){0x7ff80000, 0x0, 0x7ff80000, 0x0}), x_inf); + //y=0 => 0 + result = spu_andc(result, zero_y); + + //x=NaN or y=NaN => 0 + result = spu_sel(result, (vec_uint4)x, x_nan); + result = spu_sel(result, (vec_uint4)y, y_nan); + + return ((vec_double2)result); +} + + +/* + * extend spu_cmpgt function to 64bit data + */ +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb + vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb + return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right +} +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right + return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 eq = spu_cmpeq(aa, bb); + return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); +} + diff --git a/Extras/simdmathlibrary/spu/fmodf4.c b/Extras/simdmathlibrary/spu/fmodf4.c new file mode 100644 index 000000000..7deae38fb --- /dev/null +++ b/Extras/simdmathlibrary/spu/fmodf4.c @@ -0,0 +1,86 @@ +/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. +// If x == 0, the result is 0. +// If x != 0 and y == 0, the result is undefined. + +vector float +fmodf4 (vector float x, vector float y) +{ + vec_float4 q, xabs, yabs, qabs, xabs2; + vec_int4 qi0, qi1, qi2; + vec_float4 i0, i1, i2, r1, r2, i; + vec_uint4 inrange; + + // Find i = truncated_integer(|x/y|) + + // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. + // Thus i is either the truncated quotient, one less, or one greater. + + q = divf4( x, y ); + xabs = fabsf4( x ); + yabs = fabsf4( y ); + qabs = fabsf4( q ); + xabs2 = spu_add( xabs, xabs ); + + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); + inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); + + qi1 = spu_convts( qabs, 0 ); + qi0 = spu_add( qi1, -1 ); + qi2 = spu_add( qi1, 1 ); + + i0 = spu_convtf( qi0, 0 ); + i1 = spu_convtf( qi1, 0 ); + i2 = spu_convtf( qi2, 0 ); + + // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as + // 2*|x| - i*|y| >= |x|: + // + // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. + // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed + // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. + + r1 = spu_nmsub( i1, yabs, xabs2 ); + r2 = spu_nmsub( i2, yabs, xabs2 ); + + i = i0; + i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) ); + i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) ); + + i = copysignf4( i, q ); + + return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); +} + diff --git a/Extras/simdmathlibrary/spu/fpclassifyd2.c b/Extras/simdmathlibrary/spu/fpclassifyd2.c new file mode 100644 index 000000000..99242a8e5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/fpclassifyd2.c @@ -0,0 +1,94 @@ +/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef FP_NAN +#define FP_NAN (0) +#endif +#ifndef FP_INFINITE +#define FP_INFINITE (1) +#endif +#ifndef FP_ZERO +#define FP_ZERO (2) +#endif +#ifndef FP_SUBNORMAL +#define FP_SUBNORMAL (3) +#endif +#ifndef FP_NORMAL +#define FP_NORMAL (4) +#endif + +vector signed long long +fpclassifyd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 mask; + vec_llong2 classtype; + vec_uint4 cmpgt, cmpeq; + + //FP_NORMAL: normal unless nan, infinity, zero, or denorm + classtype = spu_splats((long long)FP_NORMAL); + + //FP_NAN: all-ones exponent and non-zero mantissa + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); + mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask ); + + //FP_INFINITE: all-ones exponent and zero mantissa + mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask ); + + //FP_ZERO: zero exponent and zero mantissa + cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask ); + + //FP_SUBNORMAL: zero exponent and non-zero mantissa + cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero ); + cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero ); + mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask ); + + return classtype; +} diff --git a/Extras/simdmathlibrary/spu/fpclassifyf4.c b/Extras/simdmathlibrary/spu/fpclassifyf4.c new file mode 100644 index 000000000..0fdc8d08d --- /dev/null +++ b/Extras/simdmathlibrary/spu/fpclassifyf4.c @@ -0,0 +1,78 @@ +/* fpclassifyf4 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef FP_NAN +#define FP_NAN (0) +#endif +#ifndef FP_INFINITE +#define FP_INFINITE (1) +#endif +#ifndef FP_ZERO +#define FP_ZERO (2) +#endif +#ifndef FP_SUBNORMAL +#define FP_SUBNORMAL (3) +#endif +#ifndef FP_NORMAL +#define FP_NORMAL (4) +#endif + +vector signed int +fpclassifyf4 (vector float x) +{ + vec_uint4 zero = spu_splats((unsigned int)0x00000000); + + vec_uint4 mask; + vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff); + vec_int4 classtype = (vec_int4)zero; + + //FP_NAN: NaN not supported on SPU, never return FP_NAN + + //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE + + //FP_ZERO: zero exponent and zero mantissa + mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero ); + classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask ); + unclassified = spu_andc( unclassified, mask ); + + //FP_SUBNORMAL: zero exponent and non-zero mantissa + mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ), + spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) ); + classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask ); + unclassified = spu_andc( unclassified, mask ); + + //FP_NORMAL: none of the above + classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified ); + + return classtype; +} diff --git a/Extras/simdmathlibrary/spu/frexpd2.c b/Extras/simdmathlibrary/spu/frexpd2.c new file mode 100644 index 000000000..3643c54b5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/frexpd2.c @@ -0,0 +1,95 @@ +/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x' + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef DBL_NAN +#define DBL_NAN ((long long)0x7FF8000000000000ull) +#endif + +vector double +frexpd2 (vector double x, vector signed long long *pexp) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull}; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, isinf, iszero; + vec_ullong2 e0, x0, x1; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, sh, ex; + vec_double2 fr, frac = (vec_double2)zero; + + //NAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) ); + isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even ); + frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan ); + + //INF: x is infinite (all-ones exponent and zero mantissa) + isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + frac = spu_sel( frac, x , isinf ); + + //x is zero (zero exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); + + frac = spu_sel( frac, (vec_double2)zero , iszero ); + *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero ); + + //Integer Exponent: if x is normal or subnormal + + //...shift left to normalize fraction, zero shift if normal + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) ); + sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) ); + + x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) ); + x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) ); + fr = (vec_double2)spu_sel( x1, x0, maskdw ); + fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn ); + fr = spu_sel( fr, x, sign ); + + e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 ); + ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) ); + + frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) ); + *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) ); + + return frac; +} diff --git a/Extras/simdmathlibrary/spu/frexpf4.c b/Extras/simdmathlibrary/spu/frexpf4.c new file mode 100644 index 000000000..a1c17b335 --- /dev/null +++ b/Extras/simdmathlibrary/spu/frexpf4.c @@ -0,0 +1,47 @@ +/* frexpf4 - for each element of vector x, return the normalized fraction and store the exponent of x' + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +frexpf4 (vector float x, vector signed int *pexp) +{ + vec_int4 zeros = spu_splats((int)0); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = spu_splats((int)0x7F800000); + vec_int4 e1 = spu_and((vec_int4)x, expmask); + vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126)); + *pexp = spu_sel(e2, zeros, zeromask); + + vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + + return spu_sel(m2, (vec_float4)zeros, zeromask); +} diff --git a/Extras/simdmathlibrary/spu/hypotd2.c b/Extras/simdmathlibrary/spu/hypotd2.c new file mode 100644 index 000000000..f45580bd0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/hypotd2.c @@ -0,0 +1,40 @@ +/* hypotd2 - for each element of vector x and y, return the square root of (x')^2 + (y')^2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +hypotd2 (vector double x, vector double y) +{ + vec_double2 sum = spu_mul(x,x); + sum = spu_madd(y,y,sum); + + return sqrtd2(sum); +} diff --git a/Extras/simdmathlibrary/spu/hypotf4.c b/Extras/simdmathlibrary/spu/hypotf4.c new file mode 100644 index 000000000..4b7336a78 --- /dev/null +++ b/Extras/simdmathlibrary/spu/hypotf4.c @@ -0,0 +1,40 @@ +/* hypotf4 - for each element of vector x and y, return the square root of (x')^2 + (y')^2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +hypotf4 (vector float x, vector float y) +{ + vec_float4 sum = spu_mul(x,x); + sum = spu_madd(y,y,sum); + + return sqrtf4(sum); +} diff --git a/Extras/simdmathlibrary/spu/ilogbd2.c b/Extras/simdmathlibrary/spu/ilogbd2.c new file mode 100644 index 000000000..28b390f7d --- /dev/null +++ b/Extras/simdmathlibrary/spu/ilogbd2.c @@ -0,0 +1,84 @@ +/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 ((int)0x80000001) +#endif +#ifndef FP_ILOGBNAN +#define FP_ILOGBNAN ((int)0x7FFFFFFF) +#endif + +vector signed long long +ilogbd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, iszeroinf; + vec_llong2 ilogb = (vec_llong2)zero; + vec_llong2 e1, e2; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, lz1; + + //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan ); + + //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ), + spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) ); + ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf ); + + //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x + e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); + e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 ); + + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); + lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); + + ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) ); + + return ilogb; +} diff --git a/Extras/simdmathlibrary/spu/ilogbf4.c b/Extras/simdmathlibrary/spu/ilogbf4.c new file mode 100644 index 000000000..e65197f19 --- /dev/null +++ b/Extras/simdmathlibrary/spu/ilogbf4.c @@ -0,0 +1,48 @@ +/* ilogbf4 - for each element of vector x, return integer exponent of x', FP_ILOGBNAN, or FP_ILOGB0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 ((int)0x80000001) +#endif + +vector signed int +ilogbf4 (vector float x) +{ + vec_int4 minus127 = spu_splats((int)-127); + + vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000)); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0); + vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127); + + return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask); +} diff --git a/Extras/simdmathlibrary/spu/irintf4.c b/Extras/simdmathlibrary/spu/irintf4.c new file mode 100644 index 000000000..404bda09f --- /dev/null +++ b/Extras/simdmathlibrary/spu/irintf4.c @@ -0,0 +1,39 @@ +/* irintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model. + On SPU, the rounding mode for float is always towards zero. + vector singned int is returned. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed int irintf4(vector float in) +{ + return spu_convts(in,0); +} diff --git a/Extras/simdmathlibrary/spu/iroundf4.c b/Extras/simdmathlibrary/spu/iroundf4.c new file mode 100644 index 000000000..e60494330 --- /dev/null +++ b/Extras/simdmathlibrary/spu/iroundf4.c @@ -0,0 +1,55 @@ +/* iroundf4 - for each of four float slots, round to the nearest integer, + halfway cases are rounded away form zero. + + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed int iroundf4(vector float in) +{ + vec_int4 exp, out; + vec_uint4 addend; + + /* Add 0.5 (fixed precision to eliminate rounding issues + */ + exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF)); + + addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp), + spu_cmpgt((vec_uint4)exp, -31)); + + in = (vec_float4)spu_add((vec_uint4)in, addend); + + + /* Truncate the result. + */ + out = spu_convts(in,0); + + return (out); +} diff --git a/Extras/simdmathlibrary/spu/is0denormd2.c b/Extras/simdmathlibrary/spu/is0denormd2.c new file mode 100644 index 000000000..a4b2fa706 --- /dev/null +++ b/Extras/simdmathlibrary/spu/is0denormd2.c @@ -0,0 +1,46 @@ +/* is0denormd2 - for each of two double slots, if input equals 0 or denorm return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector unsigned long long +is0denormd2 (vector double x) +{ + vec_double2 xexp; + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) ); + cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) ); + cmp = spu_shuffle( cmp, cmp, even ); + + return cmp; +} diff --git a/Extras/simdmathlibrary/spu/is0denormf4.c b/Extras/simdmathlibrary/spu/is0denormf4.c new file mode 100644 index 000000000..1d1b4f2d8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/is0denormf4.c @@ -0,0 +1,37 @@ +/* is0denormf4 - for each element of vector x, return a mask of ones if x' is zero or denorm, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +is0denormf4 (vector float x) +{ + return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) ); +} diff --git a/Extras/simdmathlibrary/spu/isequald2.c b/Extras/simdmathlibrary/spu/isequald2.c new file mode 100644 index 000000000..01a451d40 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isequald2.c @@ -0,0 +1,54 @@ +/* isequald2 - for each of two double slots, if x = y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isequald2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; + vec_ullong2 bothzero; + + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), + spu_or( isnand2( x ), isnand2( y ) ) ); +} diff --git a/Extras/simdmathlibrary/spu/isequalf4.c b/Extras/simdmathlibrary/spu/isequalf4.c new file mode 100644 index 000000000..36b147463 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isequalf4.c @@ -0,0 +1,37 @@ +/* isequalf4 - for each element of vector x and y, return a mask of ones if x' is equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isequalf4 (vector float x, vector float y) +{ + return spu_cmpeq(x, y); +} diff --git a/Extras/simdmathlibrary/spu/isfinited2.c b/Extras/simdmathlibrary/spu/isfinited2.c new file mode 100644 index 000000000..6c3de03d0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isfinited2.c @@ -0,0 +1,47 @@ +/* isfinited2 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isfinited2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 cmpr; + + //Finite unless NaN or Inf, check for 'not all-ones exponent' + + cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ); + cmpr = spu_shuffle( cmpr, cmpr, even); + + return cmpr; +} + diff --git a/Extras/simdmathlibrary/spu/isfinitef4.c b/Extras/simdmathlibrary/spu/isfinitef4.c new file mode 100644 index 000000000..50c8cd68d --- /dev/null +++ b/Extras/simdmathlibrary/spu/isfinitef4.c @@ -0,0 +1,40 @@ +/* isfinitef4 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isfinitef4 (vector float x) +{ + (void)x; + + // NaN, INF not supported on SPU, result always a mask of ones + return spu_splats((unsigned int)0xffffffff); +} diff --git a/Extras/simdmathlibrary/spu/isgreaterd2.c b/Extras/simdmathlibrary/spu/isgreaterd2.c new file mode 100644 index 000000000..8aee1e27e --- /dev/null +++ b/Extras/simdmathlibrary/spu/isgreaterd2.c @@ -0,0 +1,65 @@ +/* isgreaterd2 - for each of two double slots, if x > y return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isgreaterd2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; + + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); + + return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), + spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); +} + diff --git a/Extras/simdmathlibrary/spu/isgreaterequald2.c b/Extras/simdmathlibrary/spu/isgreaterequald2.c new file mode 100644 index 000000000..4de07d1a9 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isgreaterequald2.c @@ -0,0 +1,67 @@ +/* isgreaterequald2 - for each of two double slots, if x is greater or equal to y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isgreaterequald2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; + + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + cmpeq_ll = spu_or( cmpeq_ll, bothzero); + + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); + + return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ), + spu_or( isnand2 ( x ), isnand2 ( y ) ) ); +} + diff --git a/Extras/simdmathlibrary/spu/isgreaterequalf4.c b/Extras/simdmathlibrary/spu/isgreaterequalf4.c new file mode 100644 index 000000000..886c02e25 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isgreaterequalf4.c @@ -0,0 +1,41 @@ +/* isgreaterequalf4 - for each element of vector x and y, return a mask of ones if x' is greater than or equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isgreaterequalf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpgt(y, x); + + return spu_nor(var, var); +} diff --git a/Extras/simdmathlibrary/spu/isgreaterf4.c b/Extras/simdmathlibrary/spu/isgreaterf4.c new file mode 100644 index 000000000..52f049e17 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isgreaterf4.c @@ -0,0 +1,37 @@ +/* isgreaterf4 - for each element of vector x and y, return a mask of ones if x' is greater than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isgreaterf4 (vector float x, vector float y) +{ + return spu_cmpgt(x, y); +} diff --git a/Extras/simdmathlibrary/spu/isinfd2.c b/Extras/simdmathlibrary/spu/isinfd2.c new file mode 100644 index 000000000..c266cbdb9 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isinfd2.c @@ -0,0 +1,47 @@ +/* isinfd2 - for each of two double slots, if input equals +Inf or -Inf return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector unsigned long long +isinfd2 (vector double x) +{ + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_double2 xabs; + vec_ullong2 cmp; + + xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) ); + cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) ); + + return cmp; +} + diff --git a/Extras/simdmathlibrary/spu/isinff4.c b/Extras/simdmathlibrary/spu/isinff4.c new file mode 100644 index 000000000..bf37bfeb7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isinff4.c @@ -0,0 +1,40 @@ +/* isinff4 - for each element of vector x, return a mask of ones if x' is INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isinff4 (vector float x) +{ + (void)x; + + // INF not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} diff --git a/Extras/simdmathlibrary/spu/islessd2.c b/Extras/simdmathlibrary/spu/islessd2.c new file mode 100644 index 000000000..7ab81c1de --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessd2.c @@ -0,0 +1,64 @@ +/* islessd2 - for each of two double slots, if x < y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +islessd2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; + + cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x ); + cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x ); + cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); + + return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), + spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); +} diff --git a/Extras/simdmathlibrary/spu/islessequald2.c b/Extras/simdmathlibrary/spu/islessequald2.c new file mode 100644 index 000000000..f09f245fb --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessequald2.c @@ -0,0 +1,66 @@ +/* islessequald2 - for each of two double slots, if x <= y return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +islessequald2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; + + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + cmpeq_ll = spu_or( cmpeq_ll, bothzero); + + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); + + return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), + spu_or( isnand2 ( x ), isnand2 ( y ) ) ); +} diff --git a/Extras/simdmathlibrary/spu/islessequalf4.c b/Extras/simdmathlibrary/spu/islessequalf4.c new file mode 100644 index 000000000..cf3459fa7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessequalf4.c @@ -0,0 +1,41 @@ +/* islessequalf4 - for each element of vector x and y, return a mask of ones if x' is less than or equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +islessequalf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpgt(x, y); + + return spu_nor(var, var); +} diff --git a/Extras/simdmathlibrary/spu/islessf4.c b/Extras/simdmathlibrary/spu/islessf4.c new file mode 100644 index 000000000..55921dd53 --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessf4.c @@ -0,0 +1,37 @@ +/* islessf4 - for each element of vector x and y, return a mask of ones if x' is less than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +islessf4 (vector float x, vector float y) +{ + return spu_cmpgt(y, x); +} diff --git a/Extras/simdmathlibrary/spu/islessgreaterd2.c b/Extras/simdmathlibrary/spu/islessgreaterd2.c new file mode 100644 index 000000000..89d4b90e7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessgreaterd2.c @@ -0,0 +1,55 @@ +/* islessgreaterd2 - for each of two double slots, if x is less or greater than y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +islessgreaterd2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; + vec_ullong2 bothzero; + + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); + + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd), + spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); +} + diff --git a/Extras/simdmathlibrary/spu/islessgreaterf4.c b/Extras/simdmathlibrary/spu/islessgreaterf4.c new file mode 100644 index 000000000..65ee77e20 --- /dev/null +++ b/Extras/simdmathlibrary/spu/islessgreaterf4.c @@ -0,0 +1,41 @@ +/* islessgreaterf4 - for each element of vector x and y, return a mask of ones if x' is less than or greater than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +islessgreaterf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpeq(x, y); + + return spu_nor(var, var); +} diff --git a/Extras/simdmathlibrary/spu/isnand2.c b/Extras/simdmathlibrary/spu/isnand2.c new file mode 100644 index 000000000..12e7c3e77 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isnand2.c @@ -0,0 +1,52 @@ +/* isnand2 - for each of two double slots, if input is any type of NaN return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isnand2 (vector double x) +{ + vec_double2 xneg; + vec_ullong2 cmpgt, cmpeq, cmpnan; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull); + + xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask ); + + cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + + return cmpnan; +} + diff --git a/Extras/simdmathlibrary/spu/isnanf4.c b/Extras/simdmathlibrary/spu/isnanf4.c new file mode 100644 index 000000000..39827b148 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isnanf4.c @@ -0,0 +1,40 @@ +/* isnanf4 - for each element of vector x, return a mask of ones if x' is NaN, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isnanf4 (vector float x) +{ + (void)x; + + // NaN not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} diff --git a/Extras/simdmathlibrary/spu/isnormald2.c b/Extras/simdmathlibrary/spu/isnormald2.c new file mode 100644 index 000000000..ae2897402 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isnormald2.c @@ -0,0 +1,49 @@ +/* isnormald2 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isnormald2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 cmpr; + + //Normal unless nan, infinite, denorm, or zero + + //Check for 'not zero or all-ones exponent' + cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ), + spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) ); + cmpr = spu_shuffle( cmpr, cmpr, even); + + return cmpr; +} + diff --git a/Extras/simdmathlibrary/spu/isnormalf4.c b/Extras/simdmathlibrary/spu/isnormalf4.c new file mode 100644 index 000000000..a49fb695d --- /dev/null +++ b/Extras/simdmathlibrary/spu/isnormalf4.c @@ -0,0 +1,38 @@ +/* isnormalf4 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isnormalf4 (vector float x) +{ + // NaN, INF not supported on SPU; normal unless zero + return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000)); +} diff --git a/Extras/simdmathlibrary/spu/isunorderedd2.c b/Extras/simdmathlibrary/spu/isunorderedd2.c new file mode 100644 index 000000000..ffcb3369a --- /dev/null +++ b/Extras/simdmathlibrary/spu/isunorderedd2.c @@ -0,0 +1,63 @@ +/* isunorderedd2 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long +isunorderedd2 (vector double x, vector double y) +{ + vec_double2 neg; + vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull); + vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull); + + //Check if x is nan + neg = (vec_double2)spu_or( (vec_ullong2)x, sign ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); + + cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + + //Check if y is nan + neg = (vec_double2)spu_or( (vec_ullong2)y, sign ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); + + cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + + return spu_or( cmpnanx, cmpnany ); +} + diff --git a/Extras/simdmathlibrary/spu/isunorderedf4.c b/Extras/simdmathlibrary/spu/isunorderedf4.c new file mode 100644 index 000000000..e09df12e5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/isunorderedf4.c @@ -0,0 +1,41 @@ +/* isunorderedf4 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +isunorderedf4 (vector float x, vector float y) +{ + (void)x; + (void)y; + + // NaN not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} diff --git a/Extras/simdmathlibrary/spu/ldexpd2.c b/Extras/simdmathlibrary/spu/ldexpd2.c new file mode 100644 index 000000000..9fa555616 --- /dev/null +++ b/Extras/simdmathlibrary/spu/ldexpd2.c @@ -0,0 +1,263 @@ +/* ldexpd2 - Multiply Double by 2 Raised to its Power + For large elements of ex (overflow), returns HUGE_VALF + For small elements of ex (underflow), returns 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +ldexpd2(vector double x, vector signed long long ex) +{ + vec_int4 e1, e2; + vec_int4 min = spu_splats(-2099); +// vec_int4 min = spu_splats(-2044); + vec_int4 max = spu_splats( 2098); +// vec_int4 max = spu_splats( 2046); + vec_uint4 cmp_min, cmp_max; + vec_uint4 shift = ((vec_uint4){20, 32, 20, 32}); + vec_double2 f1, f2; + vec_double2 out; + vec_double2 in = x; + vec_int4 exp_in; + + // check input data range + vec_int4 exp0 = spu_shuffle( (vec_int4)ex, (vec_int4)ex, ((vec_uchar16){4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15})); + vec_int4 dmy = spu_shuffle( (vec_int4)spu_splats(0x10000), (vec_int4)ex, ((vec_uchar16){16,1,2,3, 16,1,2,3, 24,1,2,3,24,1,2,3})); + // (-)0xFFFFFFFF80000000 or (+)0x000000007FFFFFFF + vec_int4 msk_range = ((vec_int4){0,0x80000000, 0,0x80000000}); + vec_int4 inrange = spu_addx( (vec_int4)ex, msk_range, spu_rlqwbyte(spu_genc((vec_int4)ex, msk_range), 4)); + inrange = (vec_int4)spu_cmpeq( inrange, 0 ); + inrange = spu_shuffle(inrange,inrange,((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + // select dummy over ranged data or input data + vec_int4 exp = spu_sel( dmy, exp0, (vec_uint4)inrange); + exp_in = exp; + /* Clamp the specified exponent to the range -2044 to 2046. + */ + cmp_min = spu_cmpgt(exp, min); + cmp_max = spu_cmpgt(exp, max); + exp = spu_sel(min, exp, cmp_min); + exp = spu_sel(exp, max, cmp_max); + + /* Generate the factors f1 = 2^e1 and f2 = 2^e2 + */ + e1 = spu_rlmaska(exp, -1); + e2 = spu_sub(exp, e1); + + f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift); + + vec_double2 otmp = spu_mul(x, f1); + vec_uint4 fpscr1 = spu_mffpscr(); + + f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift); + + out = spu_mul(otmp, f2); + vec_uint4 fpscr2 = spu_mffpscr(); + + /* Compute the product x * 2^e1 * 2^e2 + */ +// out = spu_mul(spu_mul(x, f1), f2); + + // check floating point register DENORM bit + vec_uint4 fpscr0, fpscr; + fpscr0 = spu_or(fpscr1, fpscr2); + fpscr = spu_shuffle(fpscr0, fpscr0, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,10,0x80,0x80,0x80,6,0x80,0x80,0x80,0x80,0x80})); + fpscr = spu_or(fpscr0, fpscr); + if ( __builtin_expect(spu_extract(fpscr, 1) == 0, 1) ) return out; + + + ////////////////////// + // Denormalized calc// + ////////////////////// + + vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}; + vec_uint4 signmask = ((vec_uint4){0x80000000,0,0x80000000,0}); + vec_int4 zeros = spu_splats(0); + vec_uchar16 msk_64_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}); + + //check input was zero + vec_uint4 x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1})); + vec_uint4 x_zero = spu_cmpeq( x_body, (vec_uint4)zeros ); + x_zero = spu_and( x_zero, spu_shuffle(x_zero,x_zero,msk_64_eq)); + + // check Denormalized input + vec_int4 cnt_zero = (vec_int4)spu_cntlz(x_body); + vec_uint4 is_den = (vec_uint4)spu_cmpgt(cnt_zero, 11); // Denormalized data 000XXXXX XXXXXXXX + is_den = spu_shuffle( is_den, is_den, splat_msb); + is_den = spu_sel(is_den, (vec_uint4)zeros, x_zero); // exclude zero from denormalized + + // count 0bits for 64bit + vec_uint4 cnt_ex = (vec_uint4)spu_cmpgt(cnt_zero, 31); // Denormalized data 00000000 XXXXXXXX + vec_int4 cnt_z = spu_shuffle( cnt_zero, cnt_zero, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})); + cnt_zero = spu_add(cnt_zero, spu_sel(zeros, cnt_z, cnt_ex)); + cnt_zero = spu_shuffle(cnt_zero, cnt_zero, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + // extract each 64bit data + x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x000FFFFF,-1,0x000FFFFF,-1})); + vec_uint4 mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)exp_in, -31); + sign = spu_shuffle(sign, sign, splat_msb); + + // set max shift count + vec_int4 sht = spu_add( cnt_zero, ((vec_int4){-11,-64,-11,-64})); + + // denorm & exp+ shift left + vec_uint4 cmp = spu_cmpgt( sht, exp_in); + vec_int4 sht_l = spu_sel(sht, exp_in, cmp); + int shtl0 = spu_extract(sht_l, 0); + int shtl1 = spu_extract(sht_l, 2); + vec_uint4 mant0l = spu_slqwbytebc( spu_slqw(mant0, shtl0), shtl0 ); + vec_uint4 mant1l = spu_slqwbytebc( spu_slqw(mant1, shtl1), shtl1 ); + vec_int4 expp = spu_shuffle(spu_sub(exp_in, sht_l), zeros, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + exp0 = spu_sel( expp, exp_in, sign ); // select plus or minus caluc + vec_uint4 mantl = spu_shuffle( mant0l, mant1l, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); + vec_uint4 mant = spu_sel( mantl, (vec_uint4)x, sign); + exp = spu_sel( exp_in, exp0, is_den ); // select denormalized + x = (vec_double2)spu_sel( (vec_uint4)x, mant, is_den); + + + ////////////////////////////////////////////////////////////////////////// + // from ldexpf4 + vec_int4 expmask = ((vec_int4){0x7FF00000, 0, 0x7FF00000, 0}); + e1 = spu_and((vec_int4)x, expmask); + e2 = spu_rlmask(e1,-20); + + vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 2046); + vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-2044), exp); + minmask = spu_or (minmask, (vec_uchar16)x_zero); + + vec_int4 esum = spu_add(e2, exp); + + maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046)); + maxmask = spu_shuffle(maxmask, maxmask, splat_msb); +// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); + minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); + minmask = spu_shuffle(minmask, minmask, splat_msb); + + // check denorm + vec_uint4 mxmask = spu_and(spu_cmpgt(e2, 0), ((vec_uint4){0x00100000,0,0x00100000,0})); // not denorm + vec_int4 esum2 = spu_sub(esum, (vec_int4)spu_rlmask(mxmask, -20)); // reverse to norm + vec_uint4 mrange = spu_and(spu_cmpgt(zeros, esum2), spu_cmpgt(esum2, -55)); // denorm range + mrange = spu_shuffle(mrange, mrange, splat_msb); + + vec_int4 sht_r = spu_sel(spu_splats(-54), esum2, spu_cmpgt(esum2, spu_splats(-54)) ); + vec_int4 sht_rh = spu_add( sht_r, ((vec_int4){7,7,7,7})); + + x_body = spu_or( x_body, mxmask ); + mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) ); + vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) ); + +#ifdef LDEXPD2_ROUND + // check current round mode + fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0})); + fpscr1 = spu_and(fpscr, ((vec_uint4){0,0x300,0,0})); + + // prepare round data + vec_uint4 rnd0 = spu_slqwbytebc( spu_slqw( mant0r, 31), 31); + vec_uint4 rnd1 = spu_slqwbytebc( spu_slqw( mant1r, 31), 31); + vec_uint4 rnd0w = (vec_uint4)spu_cntb( (vec_uchar16)rnd0 ); + vec_uint4 rnd1w = (vec_uint4)spu_cntb( (vec_uchar16)rnd1 ); + rnd0w = spu_or( spu_slqwbyte(rnd0w,4), spu_slqwbyte(rnd0w,8)); + rnd1w = spu_or( spu_slqwbyte(rnd1w,4), spu_slqwbyte(rnd1w,8)); + rnd0 = spu_or( rnd0, rnd0w); + rnd1 = spu_or( rnd1, rnd1w); + + // nearest + // check half + vec_uint4 hit0 = spu_cmpeq(rnd0, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out + vec_uint4 hit1 = spu_cmpeq(rnd1, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out + vec_uint4 add0 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit0); + vec_uint4 add1 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit1); + // check greater than half + rnd0 = spu_and( rnd0, ((vec_uint4){0,0x7FFFFFFF,0,0})); + rnd1 = spu_and( rnd1, ((vec_uint4){0,0x7FFFFFFF,0,0})); + hit0 = spu_cmpgt(rnd0, ((vec_uint4){0,0x40000000,0,0})); + hit1 = spu_cmpgt(rnd1, ((vec_uint4){0,0x40000000,0,0})); + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + // select if fp0 + add0 = spu_sel((vec_uint4)zeros, add0, spu_cmpeq(fpscr0, (vec_uint4)zeros)); + add1 = spu_sel((vec_uint4)zeros, add1, spu_cmpeq(fpscr1, (vec_uint4)zeros)); + + // toward zero do nothing + // upward + sign = spu_rlmaska((vec_uint4)in, -31); + vec_uint4 sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,0,0,0,0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 sign1 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,8,8,8,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 hit0w = spu_cmpgt(rnd0, ((vec_uint4){0,0,0,0})); + vec_uint4 hit1w = spu_cmpgt(rnd1, ((vec_uint4){0,0,0,0})); + + hit0 = spu_andc(hit0w, sign0); + hit1 = spu_andc(hit1w, sign1); + hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0x800,0,0}))); + hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x200,0,0}))); + // select if fp2 + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + + // downward + hit0 = spu_and(hit0w, sign0); + hit1 = spu_and(hit1w, sign1); + hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0xc00,0,0}))); + hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x300,0,0}))); + // select if fp3 + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + + // calc round + mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4)); + mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4)); + +#endif // LDEXPD2_ROUND + + vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); + + // select right answer + x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask); + x = spu_sel(x, (vec_double2)zeros, minmask); + x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask); + + out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange); + + // check Infinity,NaN + vec_uint4 is_inf = spu_cmpeq(e1, expmask); + is_inf = spu_and( is_inf, spu_shuffle(is_inf,is_inf,msk_64_eq)); + out = (vec_double2)spu_sel((vec_uint4)out , (vec_uint4)in, is_inf); + + out = spu_sel(out, in, (vec_ullong2)signmask); + return out; +} + + diff --git a/Extras/simdmathlibrary/spu/ldexpf4.c b/Extras/simdmathlibrary/spu/ldexpf4.c new file mode 100644 index 000000000..1f5bffcac --- /dev/null +++ b/Extras/simdmathlibrary/spu/ldexpf4.c @@ -0,0 +1,56 @@ +/* ldexpf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +vector float +ldexpf4 (vector float x, vector signed int exp) +{ + vec_int4 zeros = spu_splats(0); + + vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000); + vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask); + vec_int4 e2 = spu_rlmask(e1,-23); + + vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255); + vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp); + minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros)); + + vec_int4 esum = spu_add(e2, exp); + + maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255)); + maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF)); + minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); + + x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask); + x = spu_sel(x, (vec_float4)zeros, minmask); + //x = spu_sel(x, (vec_float4)spu_splats((int)0xFFFFFFFF), maxmask); + x = spu_sel(x, (vec_float4)maxmask, maxmask); + return x; +} diff --git a/Extras/simdmathlibrary/spu/llabsi2.c b/Extras/simdmathlibrary/spu/llabsi2.c new file mode 100644 index 000000000..14297f3cd --- /dev/null +++ b/Extras/simdmathlibrary/spu/llabsi2.c @@ -0,0 +1,45 @@ +/* llabsi2 - returns absolute value of input. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed long long +llabsi2 (vector signed long long in) +{ + vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); + sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + + vec_uint4 add_1 = ((vec_uint4){0,1,0,1}); + vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in); + res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4)); + res = spu_sel( (vec_uint4)in, res, sign); + + return ((vec_llong2)(res)); +} diff --git a/Extras/simdmathlibrary/spu/lldiv.h b/Extras/simdmathlibrary/spu/lldiv.h new file mode 100644 index 000000000..f3c990df4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/lldiv.h @@ -0,0 +1,123 @@ +/* Common functions for lldivi2/lldivu2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LLDIV_H__ +#define __LLDIV_H__ + +#include + +static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x); +static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count); +static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count); +static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x); +static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y); +static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y); + +static inline vector unsigned long long +ll_spu_cntlz(vector unsigned long long x) +{ + vec_uint4 cnt; + + cnt = spu_cntlz((vec_uint4)x); + cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4))); + cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11})); + + return (vec_ullong2)cnt; +} + +static inline vector unsigned long long +ll_spu_sl(vector unsigned long long x, vector unsigned long long count) +{ + vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; + vec_ullong2 x_upper, x_lower; + + // shift upper word + x_upper = spu_and(x, mask); + x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1)); + x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1)); + + // shift lower word + x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3)); + x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3)); + + return spu_sel(x_lower, x_upper, mask); +} + +static inline vector unsigned long long +ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count) +{ + vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; + vec_ullong2 x_upper, x_lower; + vec_uint4 cnt_byte; + + cnt_byte = spu_add((vec_uint4)count, 7); + + // shift upper word + x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1)); + x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1)); + + // shift lower word + x_lower = spu_andc(x, mask); + x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3)); + x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3)); + + return spu_sel(x_lower, x_upper, mask); +} + +static inline vector unsigned long long +ll_spu_cmpeq_zero(vector unsigned long long x) +{ + vec_uint4 cmp; + + cmp = spu_cmpeq((vec_uint4)x, 0); + return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); +} + +static inline vector unsigned long long +ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y) +{ + vec_uint4 gt; + + gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y); + gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y)); + return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +static inline vector unsigned long long +ll_spu_sub(vector unsigned long long x, vector unsigned long long y) +{ + vec_uint4 borrow; + + borrow = spu_genb((vec_uint4)x, (vec_uint4)y); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); + return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow); +} + +#endif // __LLDIV_H__ + diff --git a/Extras/simdmathlibrary/spu/lldivi2.c b/Extras/simdmathlibrary/spu/lldivi2.c new file mode 100644 index 000000000..a24ff5f5d --- /dev/null +++ b/Extras/simdmathlibrary/spu/lldivi2.c @@ -0,0 +1,128 @@ +/* lldivi2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "lldiv.h" + +static inline vector signed long long _negatell2 (vector signed long long x); + +static inline vector signed long long +_negatell2 (vector signed long long x) +{ + vector signed int zero = (vector signed int){0,0,0,0}; + vector signed int borrow; + + borrow = spu_genb(zero, (vec_int4)x); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); + return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); +} + +// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of +// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom) +{ + lldivi2_t res; + vec_ullong2 numerAbs, denomAbs; + vec_uint4 numerPos, denomPos, quotNeg; + + vec_uint4 denomZeros, numerZeros; + vec_int4 shift; + vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft; + vec_ullong2 quot, newQuot; + vec_ullong2 newNum, skip, cont; + int anyCont; + + // Determine whether result needs sign change + + numerPos = spu_cmpgt((vec_int4)numer, -1); + numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + denomPos = spu_cmpgt((vec_int4)denom, -1); + denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + quotNeg = spu_xor( numerPos, denomPos ); + + // Use absolute values of numerator, denominator + + numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos); + denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos); + + // Get difference of leading zeros. + + denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs ); + numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs ); + + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + + // Shift denom to align leading one with numerator's + + denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift ); + oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift ); + oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) ); + + // Shift left all leading zeros. + + denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros ); + oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros ); + + quot = spu_splats(0ull); + + do + { + cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) ); + anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 ); + + newQuot = spu_or( quot, oneShifted ); + + // Subtract shifted denominator from remaining numerator + // when denominator is not greater. + + skip = ll_spu_cmpgt( denomShifted, numerAbs ); + newNum = ll_spu_sub( numerAbs, denomShifted ); + + // If denominator is greater, next shift is one more, otherwise + // next shift is number of leading zeros of remaining numerator. + + numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip ); + shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros ); + + oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift ); + denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift ); + + quot = spu_sel( newQuot, quot, skip ); + numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) ); + } + while ( anyCont ); + + res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg); + res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos); + + return res; +} + diff --git a/Extras/simdmathlibrary/spu/lldivu2.c b/Extras/simdmathlibrary/spu/lldivu2.c new file mode 100644 index 000000000..864d11191 --- /dev/null +++ b/Extras/simdmathlibrary/spu/lldivu2.c @@ -0,0 +1,98 @@ +/* lldivu2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "lldiv.h" + +// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of +// numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom) +{ + lldivu2_t res; + vec_uint4 denomZeros, numerZeros; + vec_int4 shift; + vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft; + vec_ullong2 quot, newQuot; + vec_ullong2 newNum, skip, cont; + int anyCont; + + // Get difference of leading zeros. + + denomZeros = (vec_uint4)ll_spu_cntlz( denom ); + numerZeros = (vec_uint4)ll_spu_cntlz( numer ); + + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + + // Shift denom to align leading one with numerator's + + denomShifted = ll_spu_sl( denom, (vec_ullong2)shift ); + oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift ); + oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) ); + + // Shift left all leading zeros. + + denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros ); + oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros ); + + quot = spu_splats(0ull); + + do + { + cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) ); + anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 ); + + newQuot = spu_or( quot, oneShifted ); + + // Subtract shifted denominator from remaining numerator + // when denominator is not greater. + + skip = ll_spu_cmpgt( denomShifted, numer ); + newNum = ll_spu_sub( numer, denomShifted ); + + // If denominator is greater, next shift is one more, otherwise + // next shift is number of leading zeros of remaining numerator. + + numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip ); + shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros ); + + oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift ); + denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift ); + + quot = spu_sel( newQuot, quot, skip ); + numer = spu_sel( newNum, numer, spu_orc(skip,cont) ); + } + while ( anyCont ); + + res.quot = quot; + res.rem = numer; + return res; +} + diff --git a/Extras/simdmathlibrary/spu/llrintd2.c b/Extras/simdmathlibrary/spu/llrintd2.c new file mode 100644 index 000000000..155ec3490 --- /dev/null +++ b/Extras/simdmathlibrary/spu/llrintd2.c @@ -0,0 +1,110 @@ +/* llrintd2 - rounds two doubles in to two nearest 64bit integer. + consistent with the current rounding mode. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// Handles no exception +// over flow will return unspecified data + +vector signed long long +llrintd2 (vector double in) +{ + int shift0, shift1; + vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}); + vec_int4 exp; + vec_uint4 mant, mant0, mant1, sign, mask, borrow; + vec_uint4 implied_one = ((vec_uint4){ 0, 0, 0x00100000, 0}); + vec_uint4 exp_mask = ((vec_uint4){-1,-1, 0xFFF00000, 0}); + vec_double2 bias; + + vec_uint4 vec_zero = ((vec_uint4){0,0,0,0}); + // check denormalized + vec_uint4 exp_in = spu_and( (vec_uint4)in, 0x7FF00000 ); + vec_uint4 is_denorm = spu_cmpeq( exp_in, 0 ); + vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm); + + // check zero + vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1})); + vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero); + is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}))); + ofs = spu_sel( ofs, vec_zero, is_zerox); + + vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs ); + + /* Round the input according to the current rounding mode. + */ + vec_uint4 is_large = spu_cmpgt( exp_in, 0x43200000 ); + is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); + bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large); + bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL)); + +// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, +// (vec_ullong2)spu_splats(0x8000000000000000ULL)); + mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias)); + + /* Determine how many bits to shift the mantissa to correctly + * align it into long long element 0. + */ + exp = spu_and(spu_rlmask((vec_int4)mant, -20), 0x7FF); + exp = spu_add(exp, -1011); + shift0 = spu_extract(exp, 0); + shift1 = spu_extract(exp, 2); + + mask = spu_cmpgt(exp, 0); + mask = spu_shuffle(mask, mask, splat_msb); + + /* Algn mantissa bits + */ + mant0 = spu_sel(spu_rlmaskqwbyte(mant, -8), implied_one, exp_mask); + mant1 = spu_sel(mant, implied_one, exp_mask); + + mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0); + mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1); + + mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + mant = spu_and(mant, mask); + + /* Compute the two's complement of the mantissa if the + * input is negative. + */ + sign = (vec_uint4)spu_rlmaska((vec_int4)xx, -31); + sign = spu_shuffle(sign, sign, splat_msb); + + mant = spu_xor(mant, sign); + borrow = spu_genb(mant, sign); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ + 4,5,6,7, 192,192,192,192, + 12,13,14,15, 192,192,192,192})); + mant = spu_subx(mant, sign, borrow); + + return ((vec_llong2)(mant)); +} diff --git a/Extras/simdmathlibrary/spu/llrintf4.c b/Extras/simdmathlibrary/spu/llrintf4.c new file mode 100644 index 000000000..a9d24c139 --- /dev/null +++ b/Extras/simdmathlibrary/spu/llrintf4.c @@ -0,0 +1,102 @@ +/* llrintf4 - rounds four floats in to four nearest 64bit integer. + On SPU the rounding mode for floats is always towards 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// Handles no exception +// over flow will return unspecified data + +llroundf4_t +llrintf4 (vector float in) +{ + llroundf4_t res; + vec_int4 exp; + vec_uint4 mant0, mant1, mant2, mant3; + vec_uint4 mask, mask0, mask1; + vec_uint4 sign, sign0, sign1; + vec_uint4 borrow0, borrow1; + vec_uint4 res0, res1; + int shift0, shift1, shift2, shift3; + + /* Place mantissa bits (including implied most signficant + * bit) into the most significant bits of element 3. Elements + * 0, 1, and 2 are zeroed. + */ + mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + + /* Determine how many bits to shift the mantissa to correctly + * align it into long long element 0. + */ + exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF); + exp = spu_add(exp, -94); + shift0 = spu_extract(exp, 0); + shift1 = spu_extract(exp, 1); + shift2 = spu_extract(exp, 2); + shift3 = spu_extract(exp, 3); + + /* Algn mantissa bits + */ + mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0); + mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1); + mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2); + mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3); + + mask = spu_cmpgt(exp, 0); + mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4})); + mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12})); + + res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + res0 = spu_and(res0, mask0); + res1 = spu_and(res1, mask1); + + /* Compute the two's complement of the mantissa if the + * input is negative. + */ + sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); + sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4})); + sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12})); + + res0 = spu_xor(res0, sign0); + res1 = spu_xor(res1, sign1); + borrow0 = spu_genb(res0, sign0); + borrow1 = spu_genb(res1, sign1); + borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0})); + borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0})); + res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0); + res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1); + + return res; +} diff --git a/Extras/simdmathlibrary/spu/llroundd2.c b/Extras/simdmathlibrary/spu/llroundd2.c new file mode 100644 index 000000000..eaaab6e74 --- /dev/null +++ b/Extras/simdmathlibrary/spu/llroundd2.c @@ -0,0 +1,92 @@ +/* llroundd2 - rounds two doubles in to two nearest 64bit integer. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// Handles no exception +// over flow will return unspecified data + +vector signed long long +llroundd2 (vector double in) +{ + int shift0, shift1; + vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}; + vec_int4 exp; + vec_uint4 mant, mant0, mant1, sign, mask, borrow, addend; + vec_uint4 implied_one = { 0, 0, 0x00100000, 0}; + vec_uint4 exp_mask = { -1, -1,0xFFF00000, 0}; + + /* Determine how many bits to shift the mantissa to correctly + * align it into long long element 0. + */ + exp = spu_and(spu_rlmask((vec_int4)in, -20), 0x7FF); + exp = spu_add(exp, -1011); + shift0 = spu_extract(exp, 0); + shift1 = spu_extract(exp, 2); + + mask = spu_cmpgt(exp, 0); + mask = spu_shuffle(mask, mask, splat_msb); + + /* Algn mantissa bits + */ + mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -8), implied_one, exp_mask); + mant1 = spu_sel((vec_uint4)in, implied_one, exp_mask); + + mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0); + mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1); + + mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + mant = spu_and(mant, mask); + + /* Perform round by adding 1 if the fraction bits are + * greater than or equal to .5 + */ + addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24})); + addend = spu_rlmask(addend, -7); +// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1})); + mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4)); + + /* Compute the two's complement of the mantissa if the + * input is negative. + */ + sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); + sign = spu_shuffle(sign, sign, splat_msb); + + mant = spu_xor(mant, sign); + borrow = spu_genb(mant, sign); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ + 4,5,6,7, 192,192,192,192, + 12,13,14,15, 192,192,192,192})); + mant = spu_subx(mant, sign, borrow); + + return ((vec_llong2)(mant)); +} diff --git a/Extras/simdmathlibrary/spu/llroundf4.c b/Extras/simdmathlibrary/spu/llroundf4.c new file mode 100644 index 000000000..f4f89dae2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/llroundf4.c @@ -0,0 +1,115 @@ +/* llroundf4 - rounds four floats in to four nearest 64bit integer. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// Handles no exception +// over flow will return unspecified data + +llroundf4_t +llroundf4 (vector float in) +{ + llroundf4_t res; + vec_int4 exp; + vec_uint4 mant0, mant1, mant2, mant3; + vec_uint4 mask, mask0, mask1; + vec_uint4 sign, sign0, sign1; + vec_uint4 addend0, addend1; + vec_uint4 borrow0, borrow1; + vec_uint4 res0, res1; + int shift0, shift1, shift2, shift3; + + /* Place mantissa bits (including implied most signficant + * bit) into the most significant bits of element 3. Elements + * 0, 1, and 2 are zeroed. + */ + mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF})); + + /* Determine how many bits to shift the mantissa to correctly + * align it into long long element 0. + */ + exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF); + exp = spu_add(exp, -94); + shift0 = spu_extract(exp, 0); + shift1 = spu_extract(exp, 1); + shift2 = spu_extract(exp, 2); + shift3 = spu_extract(exp, 3); + + /* Algn mantissa bits + */ + mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0); + mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1); + mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2); + mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3); + + mask = spu_cmpgt(exp, 0); + mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4})); + mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12})); + + res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23})); + res0 = spu_and(res0, mask0); + res1 = spu_and(res1, mask1); + + /* Perform round by adding 1 if the fraction bits are + * greater than or equal to .5 + */ + addend0 = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24})); + addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24})); + addend0 = spu_rlmask(addend0, -7); + addend1 = spu_rlmask(addend1, -7); +// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1})); +// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1})); + res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4)); + res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4)); + + /* Compute the two's complement of the mantissa if the + * input is negative. + */ + sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); + sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4})); + sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12})); + + res0 = spu_xor(res0, sign0); + res1 = spu_xor(res1, sign1); + borrow0 = spu_genb(res0, sign0); + borrow1 = spu_genb(res1, sign1); + borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0})); + borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0})); + res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0); + res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1); + + return res; +} diff --git a/Extras/simdmathlibrary/spu/log10f4.c b/Extras/simdmathlibrary/spu/log10f4.c new file mode 100644 index 000000000..b3ce42112 --- /dev/null +++ b/Extras/simdmathlibrary/spu/log10f4.c @@ -0,0 +1,79 @@ +/* log10f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +#define _LOG10F_H_loga2msb ((float)0.3010299205780f) +#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f) +#define _LOG10F_H_logaemsb ((float)0.4342944622040f) +#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f) +#define _LOG10F_H_logae ((float)0.4342944819033f) + +#define _LOG10F_H_c0 ((float)(0.2988439998f)) +#define _LOG10F_H_c1 ((float)(0.3997655209f)) +#define _LOG10F_H_c2 ((float)(0.6666679125f)) + +vector float +log10f4 (vector float x) +{ + vec_int4 zeros = spu_splats((int)0); + vec_float4 ones = spu_splats(1.0f); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = spu_splats((int)0x7F800000); + vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + + vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + x = spu_sel(x , spu_add(x, x) , mask); + xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); + + vec_float4 x1 = spu_sub(x , ones); + vec_float4 z = divf4 (x1, spu_add(x, ones)); + vec_float4 w = spu_mul(z , z); + vec_float4 polyw; + polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1)); + polyw = spu_madd(polyw , w, spu_splats(_LOG10F_H_c2)); + + vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); + vec_float4 wnew = spu_convtf(xexp,0); + + vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1, + spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew)); + vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1, + spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew, + spu_mul(spu_splats(_LOG10F_H_logae), yneg)) + ); + + return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); +} + + diff --git a/Extras/simdmathlibrary/spu/log1pf4.c b/Extras/simdmathlibrary/spu/log1pf4.c new file mode 100644 index 000000000..cab4a959a --- /dev/null +++ b/Extras/simdmathlibrary/spu/log1pf4.c @@ -0,0 +1,51 @@ +/* log1pf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +vector float +log1pf4 (vector float x) +{ + vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)), + spu_cmpgt(spu_splats(0.5f), x)); + vec_float4 x2 = spu_mul(x,x); + vec_float4 d0, d1, n0, n1; + + d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f)); + d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f)); + d1 = spu_madd(x2, d1, d0); + + n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f)); + n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f)); + n1 = spu_madd(x2, n1, n0); + + return spu_sel(logf4(spu_add(x, spu_splats(1.0f))), + spu_mul(x, divf4(n1, d1)), + nearzeromask); +} diff --git a/Extras/simdmathlibrary/spu/log2f4.c b/Extras/simdmathlibrary/spu/log2f4.c new file mode 100644 index 000000000..336d2b8a0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/log2f4.c @@ -0,0 +1,71 @@ +/* log2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#define _LOG2F_H_l2emsb ((float)1.4426950216293f) +#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f) +#define _LOG2F_H_l2e ((float)1.4426950408890f) + +#define _LOG2F_H_c0 ((float)(0.2988439998f)) +#define _LOG2F_H_c1 ((float)(0.3997655209f)) +#define _LOG2F_H_c2 ((float)(0.6666679125f)) + +vector float +log2f4 (vector float x) +{ + vec_int4 zeros = spu_splats((int)0); + vec_float4 ones = spu_splats(1.0f); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = spu_splats((int)0x7F800000); + vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + + + vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + x = spu_sel(x , spu_add(x, x) , mask); + xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); + + vec_float4 x1 = spu_sub(x , ones); + vec_float4 z = divf4(x1, spu_add(x, ones)); + vec_float4 w = spu_mul(z , z); + vec_float4 polyw; + polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1)); + polyw = spu_madd(polyw , w, spu_splats(_LOG2F_H_c2)); + + vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); + vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0)); + vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1, + spu_mul(spu_splats(_LOG2F_H_l2e), yneg) + ); + + return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); +} diff --git a/Extras/simdmathlibrary/spu/logbd2.c b/Extras/simdmathlibrary/spu/logbd2.c new file mode 100644 index 000000000..d566eeb3f --- /dev/null +++ b/Extras/simdmathlibrary/spu/logbd2.c @@ -0,0 +1,93 @@ +/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef HUGE_VALL +#define HUGE_VALL __builtin_huge_vall () +#endif + +#ifndef DBL_INF +#define DBL_INF ((long long)0x7FF0000000000000ull) +#endif + +#ifndef DBL_NAN +#define DBL_NAN ((long long)0x7FF8000000000000ull) +#endif + +vector double +logbd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, isinf, iszero; + vec_double2 logb = (vec_double2)zero; + vec_llong2 e1, e2; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, lz1; + + //NAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan ); + + //INF: x is infinite (all-ones exponent and zero mantissa) + isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf ); + + //HUGE_VAL: x is zero (zero exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); + logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero ); + + //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x + e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); + e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20); + + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); + lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); + + logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), + spu_nor( isnan, spu_or( isinf, iszero ) ) ); + + return logb; +} diff --git a/Extras/simdmathlibrary/spu/logbf4.c b/Extras/simdmathlibrary/spu/logbf4.c new file mode 100644 index 000000000..85662a416 --- /dev/null +++ b/Extras/simdmathlibrary/spu/logbf4.c @@ -0,0 +1,46 @@ +/* logbf4 - for each element of vector x, return the exponent of x' as floating point value + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef HUGE_VALF +#define HUGE_VALF __builtin_huge_valf () +#endif + +vector float +logbf4 (vector float x) +{ + vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000)); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0); + e1 = spu_sub(e1, spu_splats((int)0x3F800000)); + return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask); +} + diff --git a/Extras/simdmathlibrary/spu/logf4.c b/Extras/simdmathlibrary/spu/logf4.c new file mode 100644 index 000000000..6e7f03d27 --- /dev/null +++ b/Extras/simdmathlibrary/spu/logf4.c @@ -0,0 +1,70 @@ +/* logf4 - for each of four slots, calculate the natural log + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +#define _LOGF_H_ln2msb ((float)(0.6931470632553f)) +#define _LOGF_H_ln2lsb ((float)(1.1730463525e-7f)) + +#define _LOGF_H_c0 ((float)(0.2988439998f)) +#define _LOGF_H_c1 ((float)(0.3997655209f)) +#define _LOGF_H_c2 ((float)(0.6666679125f)) + +vector float +logf4 (vector float x) +{ + vec_int4 zeros = spu_splats((int)0); + vec_float4 ones = spu_splats(1.0f); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + + vec_int4 expmask = spu_splats((int)0x7F800000); + vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + + + vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + x = spu_sel(x , spu_add(x, x) , mask); + xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); + + vec_float4 x1 = spu_sub(x , ones); + vec_float4 z = divf4 (x1, spu_add(x, ones)); + vec_float4 w = spu_mul(z , z); + vec_float4 polyw; + polyw = spu_madd(spu_splats(_LOGF_H_c0), w, spu_splats(_LOGF_H_c1)); + polyw = spu_madd(polyw , w, spu_splats(_LOGF_H_c2)); + + vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); + vec_float4 wnew = spu_convtf(xexp,0); + vec_float4 zz1 = spu_madd(spu_splats(_LOGF_H_ln2msb), wnew, x1); + vec_float4 zz2 = spu_madd(spu_splats(_LOGF_H_ln2lsb), wnew, yneg); + + return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); +} diff --git a/Extras/simdmathlibrary/spu/modfd2.c b/Extras/simdmathlibrary/spu/modfd2.c new file mode 100644 index 000000000..5c10df205 --- /dev/null +++ b/Extras/simdmathlibrary/spu/modfd2.c @@ -0,0 +1,54 @@ +/* modfd2 - for each of two double slots, compute fractional and integral parts. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +// Returns fractional part and stores integral part in *iptr. + +vector double +modfd2 (vector double x, vector double *iptr) +{ + vec_double2 integral, fraction; + vec_uint4 iszero; + vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0}; + vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; + + integral = truncd2( x ); + + // if integral is zero, then fraction is x. + iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0); + iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern)); + fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero); + + *iptr = integral; + return fraction; +} + diff --git a/Extras/simdmathlibrary/spu/modff4.c b/Extras/simdmathlibrary/spu/modff4.c new file mode 100644 index 000000000..3b28242c4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/modff4.c @@ -0,0 +1,47 @@ +/* modff4 - for each of four float slots, compute fractional and integral parts. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +// Returns fractional part and stores integral part in *iptr. + +vector float +modff4 (vector float x, vector float *iptr) +{ + vec_float4 integral, fraction; + + integral = truncf4( x ); + fraction = spu_sub( x, integral ); + + *iptr = integral; + return fraction; +} + diff --git a/Extras/simdmathlibrary/spu/nearbyintd2.c b/Extras/simdmathlibrary/spu/nearbyintd2.c new file mode 100644 index 000000000..ac5f90755 --- /dev/null +++ b/Extras/simdmathlibrary/spu/nearbyintd2.c @@ -0,0 +1,71 @@ +/* nearbyintd2 - Round the input to the nearest integer according to + the current rounding mode without raising an inexact exception. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +nearbyintd2(vector double in) +{ + vec_uint4 fpscr; + vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL}); + vec_double2 out, addend; + vec_uint4 vec_zero = ((vec_uint4){0,0,0,0}); + + fpscr = spu_mffpscr(); + + // check denormalized + vec_uint4 exp = spu_and( (vec_uint4)in, 0x7FF00000 ); + vec_uint4 is_denorm = spu_cmpeq( exp, 0 ); + vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm); + + // check zero + vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1})); + vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero); + is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}))); + ofs = spu_sel( ofs, vec_zero, is_zerox); + + vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs ); + + /* Add 2^53 and then subtract 2^53 to affect a round to be performed by the + * hardware. Also preserve the input sign so that negative inputs that + * round to zero generate a -0.0. + */ + vec_uint4 is_large = spu_cmpgt( exp, 0x43200000 ); + is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); + addend = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large); + addend = spu_sel(addend, xx, sign); + + out = spu_sel(spu_sub(spu_add(xx, addend), addend), xx, sign); + + spu_mtfpscr(fpscr); + + return (out); +} diff --git a/Extras/simdmathlibrary/spu/nearbyintf4.c b/Extras/simdmathlibrary/spu/nearbyintf4.c new file mode 100644 index 000000000..9770a8ec6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/nearbyintf4.c @@ -0,0 +1,50 @@ +/* nearbyintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model, + without raising an inexact floating-point exception. + On SPU, the rounding mode for float is always towards zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +vector float nearbyintf4(vector float x) +{ + vector signed int xi; + vector unsigned int inrange; + + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. + + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); +} diff --git a/Extras/simdmathlibrary/spu/negated2.c b/Extras/simdmathlibrary/spu/negated2.c new file mode 100644 index 000000000..801dddfa8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/negated2.c @@ -0,0 +1,38 @@ +/* negated2 - for each of two double slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +negated2 (vector double x) +{ + return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); +} + diff --git a/Extras/simdmathlibrary/spu/negatef4.c b/Extras/simdmathlibrary/spu/negatef4.c new file mode 100644 index 000000000..f9d5b6cb4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/negatef4.c @@ -0,0 +1,38 @@ +/* negatef4 - for each of four float slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float negatef4 (vector float x) +{ + return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) ); +} + diff --git a/Extras/simdmathlibrary/spu/negatei4.c b/Extras/simdmathlibrary/spu/negatei4.c new file mode 100644 index 000000000..f74232a39 --- /dev/null +++ b/Extras/simdmathlibrary/spu/negatei4.c @@ -0,0 +1,39 @@ +/* negatei4 - for each of 4 signed int slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed int +negatei4 (vector signed int x) +{ + vector signed int zero = (vector signed int){0,0,0,0}; + return spu_sub (zero, x); +} + diff --git a/Extras/simdmathlibrary/spu/negatell2.c b/Extras/simdmathlibrary/spu/negatell2.c new file mode 100644 index 000000000..f3fdb5603 --- /dev/null +++ b/Extras/simdmathlibrary/spu/negatell2.c @@ -0,0 +1,43 @@ +/* negatell2 - for each of 2 signed long long slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector signed long long +negatell2 (vector signed long long x) +{ + vector signed int zero = (vector signed int){0,0,0,0}; + vector signed int borrow; + + borrow = spu_genb(zero, (vec_int4)x); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0})); + return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); +} + diff --git a/Extras/simdmathlibrary/spu/nextafterd2.c b/Extras/simdmathlibrary/spu/nextafterd2.c new file mode 100644 index 000000000..de43dd4ec --- /dev/null +++ b/Extras/simdmathlibrary/spu/nextafterd2.c @@ -0,0 +1,92 @@ +/* nextafterd2 - find next representable floating-point value towards 2nd param. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +nextafterd2 (vector double xx, vector double yy) +{ + vec_uint4 abs_x, abs_y, sign_x, abs_dif; + vec_uint4 is_sub, is_zerox, is_zeroy; + vec_uint4 is_equal, is_infy, is_nany; + vec_uint4 res0, res1, res; + vec_uint4 vec_zero = ((vec_uint4){0,0,0,0}); + vec_uint4 vec_one = ((vec_uint4){0,1,0,1}); + vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1}); + vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0}); + vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}); + vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}); + + // mask sign bit + abs_x = spu_and( (vec_uint4)xx, msk_abs); + abs_y = spu_and( (vec_uint4)yy, msk_abs); + + is_zerox = spu_cmpeq( abs_x, vec_zero); + is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq)); + + // -0 exception + sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0})); + sign_x = spu_sel(sign_x, vec_zero, is_zerox); + + // if same sign |y| < |x| -> decrease + abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4)); + is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease + is_sub = spu_or(is_sub, abs_dif); + is_sub = spu_rlmaska(is_sub, -31); + is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); + + res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase + res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease + res = spu_sel( res0, res1, is_sub); // select increase or decrease + res = spu_or( res, sign_x); // set sign + + // check exception + // 0 -> -1 + res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub)); + + // check equal (include 0,-0) + is_zeroy = spu_cmpeq( abs_y, vec_zero); + is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq)); + is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy); + is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq)); + is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox)); + res = spu_sel(res, (vec_uint4)yy, is_equal); + + // check nan + is_infy = spu_cmpeq( abs_y, msk_exp); + is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq)); + is_nany = spu_and( abs_y, msk_exp); + is_nany = spu_cmpeq( is_nany, msk_exp); + is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq)); + is_nany = spu_sel( is_nany, vec_zero, is_infy); + res = spu_sel(res, (vec_uint4)yy, is_nany); + + return (vec_double2)res; +} diff --git a/Extras/simdmathlibrary/spu/nextafterf4.c b/Extras/simdmathlibrary/spu/nextafterf4.c new file mode 100644 index 000000000..3807d48d4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/nextafterf4.c @@ -0,0 +1,66 @@ +/* nextafterf4 - for each of four float slots, + return the the next representable value after x in the direction fo y, + if x is euqal to y, the result is y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector float nextafterf4(vector float x, vector float y) +{ + vec_float4 x_not_dec, lala_inc, lala_dec; + vec_uint4 abs_inc_number, abs_dec_number; + + vec_uint4 A, B; + + //abs_inc, abs_dec + abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats((float)0x0))); + abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats((float)0x0)); + + //x<= y + A= spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000)); + // in < 0 + B= abs_dec_number; + + lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats((float)0x0), x)); + + // in <=0, abs_inc ( if in==0, set result's sign to -) + //A= spu_or(spu_splats((unsigned int)0x80000000), spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000))); + A= spu_or(abs_inc_number, spu_splats((unsigned int)0x80000000)); + // in > 0 + B = abs_dec_number; + lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats((float)0x0))); + + + x_not_dec = spu_sel(y, lala_inc, spu_cmpgt(y, x)); + + // (x <= y) || (x > y) + return spu_sel(x_not_dec, lala_dec, spu_cmpgt(x, y)); +} diff --git a/Extras/simdmathlibrary/spu/powf4.c b/Extras/simdmathlibrary/spu/powf4.c new file mode 100644 index 000000000..98c57a131 --- /dev/null +++ b/Extras/simdmathlibrary/spu/powf4.c @@ -0,0 +1,72 @@ +/* powf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +vector float +powf4 (vector float x, vector float y) +{ + vec_int4 zeros = spu_splats((int)0); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x); + + vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + + vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); + vec_float4 absx = spu_andc(x, sbit); + vec_float4 absy = spu_andc(y, sbit); + vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001)); + negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros)); + + vec_float4 res = exp2f4(spu_mul(y, log2f4(absx))); + res = spu_sel(res, spu_or(sbit, res), negmask); + + + return spu_sel(res, (vec_float4)zeros, zeromask); +} + +/* +{ + vec_int4 zeros = spu_splats(0); + vec_int4 ones = (vec_int4)spu_splats((char)0xFF); + vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x); + vec_uchar16 onemask = (vec_uchar16)spu_cmpeq((vec_float4)ones , y); + vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); + vec_float4 absx = spu_andc(x, sbit); + vec_float4 absy = spu_andc(y, sbit); + vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001)); + negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros)); + + + +} + +*/ diff --git a/Extras/simdmathlibrary/spu/recipd2.c b/Extras/simdmathlibrary/spu/recipd2.c new file mode 100644 index 000000000..3a6ef771b --- /dev/null +++ b/Extras/simdmathlibrary/spu/recipd2.c @@ -0,0 +1,80 @@ +/* recipd2 - for each of two double slots, compute reciprocal. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// Handles exceptional values as follows: +// NaN -> NaN +// (+,-)Inf -> (+,-)0 +// (+,-)0 -> (+,-)Inf +// Denormal inputs are treated as zero. + +vector double +recipd2 (vector double x) +{ + vec_ullong2 expmask, signmask; + vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result; + vec_float4 onef, manf, y0f, y1f; + + expmask = spu_splats(0x7ff0000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); + + // Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ). + // Invert exponent part with subtraction. + + exp = spu_and( x, (vec_double2)expmask ); + nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp ); + man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask ); + + // Compute mantissa part with single and double precision Newton-Raphson steps. + // Then multiply with 2^(1-exponent). + + manf = spu_roundtf( man ); + y0f = spu_re( manf ); + y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f ); + y1 = spu_extend( y1f ); + y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 ); + y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 ); + y3 = spu_mul( y3, nexp ); + + // Choose iterated result or special value. + + zero = spu_and( x, (vec_double2)signmask ); + inf = spu_sel( (vec_double2)expmask, x, signmask ); + + result = spu_sel( y3, zero, isinfd2 ( x ) ); + result = spu_sel( result, inf, is0denormd2 ( x ) ); + result = spu_sel( result, x, isnand2( x ) ); + + return result; +} + diff --git a/Extras/simdmathlibrary/spu/recipf4.c b/Extras/simdmathlibrary/spu/recipf4.c new file mode 100644 index 000000000..c0337414d --- /dev/null +++ b/Extras/simdmathlibrary/spu/recipf4.c @@ -0,0 +1,45 @@ +/* recipf4 - for each of four float slots, compute reciprocal. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float recipf4 (vector float x) +{ + // Reciprocal estimate and 1 Newton-Raphson iteration. + // A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact + // answers for powers of 2, and a slightly smaller relative error bound. + + vec_float4 y0; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + + y0 = spu_re( x ); + return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 ); +} + diff --git a/Extras/simdmathlibrary/spu/remainderd2.c b/Extras/simdmathlibrary/spu/remainderd2.c new file mode 100644 index 000000000..e44bb8868 --- /dev/null +++ b/Extras/simdmathlibrary/spu/remainderd2.c @@ -0,0 +1,313 @@ +/* A vector double is returned that contains the remainder xi REM yi, + for the corresponding elements of vector double x and vector double y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _twice(vec_uint4 aa); + +vector double +remainderd2(vector double x, vector double yy) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_uint4 y_hi; + vec_uint4 abs_x, abs_yy, abs_2x, abs_2y; + vec_uint4 bias; + vec_uint4 nan_out, overflow; + vec_uint4 result; + vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); + vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); + vec_uint4 vec_zero = spu_splats((unsigned int)0); + vec_uint4 is_zeroy; + + // cut sign + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_yy = spu_andc((vec_uint4)yy, sign_mask); + y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); + + + // check nan out + is_zeroy = spu_cmpeq(abs_yy, vec_zero); + is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); + nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 + nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 + nan_out = spu_or(nan_out, is_zeroy); // y = 0 + nan_out = spu_shuffle(nan_out, nan_out, splat_hi); + + + // make y x2 + abs_2y = _twice(abs_yy); // 2 x y + + /* + * use fmodd2 function + */ + // get remainder of y x2 +// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y); + { + vec_double2 y = (vec_double2)abs_2y; + + int shiftx0, shiftx1, shifty0, shifty1; + vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); + vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192}); +// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 n, shift; + vec_uint4 exp_x, exp_y; +// , sign; +// vec_uint4 abs_x, abs_y; + vec_uint4 abs_y; + vec_uint4 mant_x, mant_x0, mant_x1; + vec_uint4 mant_y, mant_y0, mant_y1; + vec_uint4 mant_0, mant_1; + vec_uint4 mant_r, mant_l; +// vec_uint4 result; + vec_uint4 result0, resultx; + vec_uint4 zero_x, zero_y; + vec_uint4 denorm_x, denorm_y; + vec_uint4 cnt, cnt_x, cnt_y; + vec_uint4 shift_x, shift_y; + vec_uint4 adj_x, adj_y; + vec_uint4 z, borrow, mask; + vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL)); +// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL)); + vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)); +// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1}); +// vec_uint4 vec_zero = spu_splats((unsigned int)0); + +// sign = spu_and( (vec_uint4)x, sign_mask); +// abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_y = spu_andc((vec_uint4)y, sign_mask); + exp_x = spu_rlmask(abs_x, -20); + exp_y = spu_rlmask(abs_y, -20); + // get shift count for denorm + cnt_x = spu_cntlz(abs_x); + cnt_y = spu_cntlz(abs_y); + cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32))); + cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32))); + + zero_x = spu_cmpgt(cnt_x, 63); // zero ? + zero_y = spu_cmpgt(cnt_y, 63); // zero ? + result0 = spu_or(zero_x, zero_y); + result0 = spu_shuffle(result0, result0, splat_hi); + + // 0 - (cnt_x - 11) = 11 - cnt_x + shift_x= spu_add(cnt_x, -11); + shift_y= spu_add(cnt_y, -11); + cnt_x = spu_sub(11, cnt_x); + cnt_y = spu_sub(11, cnt_y); + + // count to normalize + adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0)); + adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0)); + adj_x = spu_shuffle(adj_x, adj_x, splat_hi); + adj_y = spu_shuffle(adj_y, adj_y, splat_hi); + + // for denorm + shiftx0 = spu_extract(shift_x, 0); + shiftx1 = spu_extract(shift_x, 2); + shifty0 = spu_extract(shift_y, 0); + shifty1 = spu_extract(shift_y, 2); + mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0); + mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0); + mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1); + mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1); + mant_x = spu_sel(mant_x0, mant_x1, merge_sel); + mant_y = spu_sel(mant_y0, mant_y1, merge_sel); + + denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x); + denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y); + mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x); + mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y); + mant_x = spu_or(mant_x, implied_1); // hidden bit + mant_y = spu_or(mant_y, implied_1); // hidden bit + + // x < y ? + resultx = _vec_gt64(abs_y, abs_x); + + n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y); + mask = spu_cmpgt(n, 0); + mask = spu_andc(mask, resultx); + + while (spu_extract(spu_gather(mask), 0)) { + borrow = spu_genb(mant_x, mant_y); + borrow = spu_shuffle(borrow, borrow, propagate); + z = spu_subx(mant_x, mant_y, borrow); + + result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0); + + mant_x = spu_sel(mant_x, + spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)), + mask); + + n = spu_add(n, -1); + mask = spu_cmpgt(n, 0); + } + + borrow = spu_genb(mant_x, mant_y); + borrow = spu_shuffle(borrow, borrow, propagate); + z = spu_subx(mant_x, mant_y, borrow); + mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)); + result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0); + + // bring back to original range + mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0})); + mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1})); + + // for adj_y < 0 exp max=1 + shiftx0 = spu_extract(adj_y, 0); + shiftx1 = spu_extract(adj_y, 2); + mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0); + mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1); + mant_r = spu_sel(mant_x0, mant_x1, merge_sel); + + // for adj_y >= 0 + cnt = spu_cntlz(mant_x); + cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32))); + cnt = spu_add(cnt, -11); + cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1 + shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y)); + shiftx0 = spu_extract(shift, 0); + shiftx1 = spu_extract(shift, 2); + mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0); + mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1); + mant_l = spu_sel(mant_x0, mant_x1, merge_sel); + cnt = spu_sub(adj_y, (vec_uint4)shift); + mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask)); + + result = spu_sel(mant_l, mant_r, denorm_y); + result = spu_sel(result, vec_zero, result0); // reminder 0 + result = spu_sel(result, abs_x, resultx); // x < y +// result = spu_xor(result, sign); // set sign + +// return ((vec_double2)result); + } + + +// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF))); + + /* if (2*x > y) + * x -= y + * if (2*x >= y) x -= y + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); + // make x2 + abs_2x = _twice(abs_x); // 2 x x + + bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy + bias = spu_andc(bias, overflow); + + abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); + + + overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous + + // make x2 + abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous + bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31)); + bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); + abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); + + /* select final answer + */ + result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign + result = spu_sel(result, val_nan, nan_out); // if nan + + return ((vec_double2)result); +} + +/* + * subtraction function in limited confdition + */ +static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb) +{ + // which is bigger input aa or bb + vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa + + // need denorm calc ? + vec_uint4 norm_a, norm_b; + norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_a = spu_and(norm_a, norm_b); + norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // calc (aa - bb) and (bb - aa) + vec_uint4 res_a, res_b, res; + vec_uint4 borrow_a, borrow_b; + vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192}); + borrow_a = spu_genb(aa, bb); + borrow_b = spu_genb(bb, aa); + borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b); + borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b); + res_a = spu_subx(aa, bb, borrow_a); + res_b = spu_subx(bb, aa, borrow_b); + res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign + + res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa) + // select normal calc or special + res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a); + + return res; +} + + +/* + * extend spu_cmpgt function to 64bit data + */ +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb + vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb + return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right +} +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right + return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +/* + * double formated x2 + */ +static inline vec_uint4 _twice(vec_uint4 aa) +{ + vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0 + norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // if denorm or zero << 1 , if norm exp + 1 + return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2 +} diff --git a/Extras/simdmathlibrary/spu/remainderf4.c b/Extras/simdmathlibrary/spu/remainderf4.c new file mode 100644 index 000000000..ffbea65c1 --- /dev/null +++ b/Extras/simdmathlibrary/spu/remainderf4.c @@ -0,0 +1,107 @@ +/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. +// If x == 0, the result is 0. +// If x != 0 and y == 0, the result is undefined. +vector float +remainderf4 (vector float x, vector float y) +{ + vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf; + vec_int4 qi0, qi1, qi2; + vec_float4 i0, i1, i2, i, rem; + vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd; + + // Find i = truncated_integer(|x/y|) + + // By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0. + // Thus the exact truncation is either the truncated quotient, one less, or one greater. + + q = divf4( x, y ); + xabs = fabsf4( x ); + yabs = fabsf4( y ); + qabs = fabsf4( q ); + xabs2 = spu_add( xabs, xabs ); + + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); + inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); + + qi1 = spu_convts( qabs, 0 ); + qi0 = spu_add( qi1, -1 ); + qi2 = spu_add( qi1, 1 ); + + odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 ); + odd0 = odd2 = spu_nor( odd1, odd1 ); + + i0 = spu_convtf( qi0, 0 ); + i1 = spu_convtf( qi1, 0 ); + i2 = spu_convtf( qi2, 0 ); + + // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as + // 2*|x| - i*|y| >= |x|: + // + // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. + // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed + // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. + + cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) ); + cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) ); + + i = i0; + i = spu_sel( i1, i, cmp1 ); + i = spu_sel( i2, i, cmp2 ); + + odd = odd0; + odd = spu_sel( odd1, odd, cmp1 ); + odd = spu_sel( odd2, odd, cmp2 ); + + rem = spu_nmsub( i, yabs, xabs ); + + // Test whether i or i+1 = nearest_integer(|x/y|) + // + // i+1 is correct if: + // + // rem > 0.5*|y| + // or + // rem = 0.5*|y| and i is odd + + yabshalf = spu_mul( yabs, spu_splats(0.5f) ); + cmp1 = spu_cmpgt( rem, yabshalf ); + cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd ); + + i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) ); + i = copysignf4( i, q ); + + return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); +} + diff --git a/Extras/simdmathlibrary/spu/remquod2.c b/Extras/simdmathlibrary/spu/remquod2.c new file mode 100644 index 000000000..745fd2931 --- /dev/null +++ b/Extras/simdmathlibrary/spu/remquod2.c @@ -0,0 +1,356 @@ +/* remquod2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +/* + * This function returns the same vector double result as remainderd2(). + * In addition a vector signed long long is storedin *pquo, + * that contains the corresponding element values whose sign is + * the sign of xi / yi and whose magnitude is congruent modulo 2n to + * the magnitude of the integral quotient of xi / yi, where n is + * an implementation-defined integer greater than or equal to 3. + */ + +static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); +static inline vec_uint4 _twice(vec_uint4 aa); + +vector double +remquod2(vector double x, vector double yy, vector signed long long *quo) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 quotient, quotient0; + vec_uint4 y_hi; + vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y; + vec_uint4 bias; + vec_uint4 nan_out, not_ge, quo_pos, overflow; + vec_uint4 result; + vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); + vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); + vec_uint4 vec_zero = spu_splats((unsigned int)0); + vec_uint4 is_zeroy; + + // cut sign + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_yy = spu_andc((vec_uint4)yy, sign_mask); + y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); + + quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1); + quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi); + + // check nan out + is_zeroy = spu_cmpeq(abs_yy, vec_zero); + is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); + nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 + nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 + nan_out = spu_or(nan_out, is_zeroy); // y = 0 + nan_out = spu_shuffle(nan_out, nan_out, splat_hi); + + + // make y x8 + abs_2y = _twice(abs_yy); // 2 x y + abs_4y = _twice(abs_2y); // 4 x y + abs_8y = _twice(abs_4y); // 2 x y + + /* + * use fmodd2 function + */ + // get remainder of y x8 +// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y); + { + vec_double2 y = (vec_double2)abs_8y; + + int shiftx0, shiftx1, shifty0, shifty1; + vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); + vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192}); +// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 n, shift; + vec_uint4 exp_x, exp_y; +// , sign; +// vec_uint4 abs_x, abs_y; + vec_uint4 abs_y; + vec_uint4 mant_x, mant_x0, mant_x1; + vec_uint4 mant_y, mant_y0, mant_y1; + vec_uint4 mant_0, mant_1; + vec_uint4 mant_r, mant_l; +// vec_uint4 result; + vec_uint4 result0, resultx; + vec_uint4 zero_x, zero_y; + vec_uint4 denorm_x, denorm_y; + vec_uint4 cnt, cnt_x, cnt_y; + vec_uint4 shift_x, shift_y; + vec_uint4 adj_x, adj_y; + vec_uint4 z, borrow, mask; + vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL)); +// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL)); + vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)); +// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1}); +// vec_uint4 vec_zero = spu_splats((unsigned int)0); + +// sign = spu_and( (vec_uint4)x, sign_mask); +// abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_y = spu_andc((vec_uint4)y, sign_mask); + exp_x = spu_rlmask(abs_x, -20); + exp_y = spu_rlmask(abs_y, -20); + // get shift count for denorm + cnt_x = spu_cntlz(abs_x); + cnt_y = spu_cntlz(abs_y); + cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32))); + cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32))); + + zero_x = spu_cmpgt(cnt_x, 63); // zero ? + zero_y = spu_cmpgt(cnt_y, 63); // zero ? + result0 = spu_or(zero_x, zero_y); + result0 = spu_shuffle(result0, result0, splat_hi); + + // 0 - (cnt_x - 11) = 11 - cnt_x + shift_x= spu_add(cnt_x, -11); + shift_y= spu_add(cnt_y, -11); + cnt_x = spu_sub(11, cnt_x); + cnt_y = spu_sub(11, cnt_y); + + // count to normalize + adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0)); + adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0)); + adj_x = spu_shuffle(adj_x, adj_x, splat_hi); + adj_y = spu_shuffle(adj_y, adj_y, splat_hi); + + // for denorm + shiftx0 = spu_extract(shift_x, 0); + shiftx1 = spu_extract(shift_x, 2); + shifty0 = spu_extract(shift_y, 0); + shifty1 = spu_extract(shift_y, 2); + mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0); + mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0); + mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1); + mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1); + mant_x = spu_sel(mant_x0, mant_x1, merge_sel); + mant_y = spu_sel(mant_y0, mant_y1, merge_sel); + + denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x); + denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y); + mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x); + mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y); + mant_x = spu_or(mant_x, implied_1); // hidden bit + mant_y = spu_or(mant_y, implied_1); // hidden bit + + // x < y ? + resultx = _vec_gt64(abs_y, abs_x); + + n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y); + mask = spu_cmpgt(n, 0); + mask = spu_andc(mask, resultx); + + while (spu_extract(spu_gather(mask), 0)) { + borrow = spu_genb(mant_x, mant_y); + borrow = spu_shuffle(borrow, borrow, propagate); + z = spu_subx(mant_x, mant_y, borrow); + + result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0); + + mant_x = spu_sel(mant_x, + spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)), + mask); + + n = spu_add(n, -1); + mask = spu_cmpgt(n, 0); + } + + borrow = spu_genb(mant_x, mant_y); + borrow = spu_shuffle(borrow, borrow, propagate); + z = spu_subx(mant_x, mant_y, borrow); + mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)); + result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0); + + // bring back to original range + mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0})); + mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1})); + + // for adj_y < 0 exp max=1 + shiftx0 = spu_extract(adj_y, 0); + shiftx1 = spu_extract(adj_y, 2); + mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0); + mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1); + mant_r = spu_sel(mant_x0, mant_x1, merge_sel); + + // for adj_y >= 0 + cnt = spu_cntlz(mant_x); + cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32))); + cnt = spu_add(cnt, -11); + cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1 + shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y)); + shiftx0 = spu_extract(shift, 0); + shiftx1 = spu_extract(shift, 2); + mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0); + mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1); + mant_l = spu_sel(mant_x0, mant_x1, merge_sel); + cnt = spu_sub(adj_y, (vec_uint4)shift); + mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask)); + + result = spu_sel(mant_l, mant_r, denorm_y); + result = spu_sel(result, vec_zero, result0); // reminder 0 + result = spu_sel(result, abs_x, resultx); // x < y +// result = spu_xor(result, sign); // set sign + +// return ((vec_double2)result); + } + + // if y (x8->exp+3 7FF-7FC) overflow +// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + + /* if (x >= 4*y) + * x -= 4*y + * quotient = 4 + * else + * quotient = 0 + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF)); + + not_ge = _vec_gt64(abs_4y, abs_x); + not_ge = spu_or(not_ge, overflow); + abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge); + quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge); + + /* if (x >= 2*y + * x -= 2*y + * quotient += 2 + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF)); + + not_ge = _vec_gt64(abs_2y, abs_x); // abs_2y > abs_x + not_ge = spu_or(not_ge, overflow); + + abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge); + quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge); + + /* if (2*x > y) + * x -= y + * if (2*x >= y) x -= y + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); + // make x2 + abs_2x = _twice(abs_x); // 2 x x + + bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy + bias = spu_andc(bias, overflow); + + abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous + + // make x2 + abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous + bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31)); + bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); + abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + /* select final answer + */ + result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign + result = spu_sel(result, val_nan, nan_out); // if nan + + quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit + quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4)); + quotient = spu_sel(quotient0, quotient, quo_pos); + + *quo = (vec_llong2)quotient; + + return ((vec_double2)result); +} + +/* + * subtraction function in limited confdition + */ +static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb) +{ + // which is bigger input aa or bb + vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa + + // need denorm calc ? + vec_uint4 norm_a, norm_b; + norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_a = spu_and(norm_a, norm_b); + norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // calc (aa - bb) and (bb - aa) + vec_uint4 res_a, res_b, res; + vec_uint4 borrow_a, borrow_b; + vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192}); + borrow_a = spu_genb(aa, bb); + borrow_b = spu_genb(bb, aa); + borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b); + borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b); + res_a = spu_subx(aa, bb, borrow_a); + res_b = spu_subx(bb, aa, borrow_b); + res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign + + res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa) + // select normal calc or special + res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a); + + return res; +} + + +/* + * extend spu_cmpgt function to 64bit data + */ +static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb + vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb + return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right +} +static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right + return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +/* + * double formated x2 + */ +static inline vec_uint4 _twice(vec_uint4 aa) +{ + vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0 + norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // if denorm or zero << 1 , if norm exp + 1 + return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2 +} diff --git a/Extras/simdmathlibrary/spu/remquof4.c b/Extras/simdmathlibrary/spu/remquof4.c new file mode 100644 index 000000000..cd4eb7357 --- /dev/null +++ b/Extras/simdmathlibrary/spu/remquof4.c @@ -0,0 +1,189 @@ +/* remquof4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/* remquof4 - This function returns the same vector float result as + * remainderf4(). In addition a vector signed int is stored in + * *pquo, that contains the corresponding element values whose sign is + * the sign of xi / yi and whose magnitude is congruent modulo 2n to + * the magnitude of the integral quotient of xi / yi, where n is an + * implementation-defined integer + * greater than or equal to 3. + */ + + +#include +#include + +vector float +remquof4(vector float x, vector float y, vector signed int *quo) +{ + vec_int4 n; + vec_int4 quotient; + vec_uint4 z, y2, y4; + vec_uint4 abs_x, abs_y, abs_2x, abs_8y; + vec_uint4 exp_x, exp_y; + vec_uint4 zero_x, zero_y; +// vec_uint4 logb_x, logb_y; + vec_uint4 mant_x, mant_y; + vec_uint4 not_ge, overflow, quo_pos, mask; + vec_uint4 result, result0, resultx, cnt, sign, bias; + vec_uint4 sign_mask = spu_splats((unsigned int)0x80000000); + vec_uint4 implied_1 = spu_splats((unsigned int)0x00800000); + vec_uint4 mant_mask = spu_splats((unsigned int)0x007FFFFF); + + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_y = spu_andc((vec_uint4)y, sign_mask); + + abs_8y = spu_add(abs_y, spu_splats((unsigned int)0x01800000)); /* abs_8y = (2^3) * abs_y */ + + sign = spu_and((vec_uint4)x, sign_mask); + + quo_pos = spu_cmpgt((vec_int4)spu_and(spu_xor((vec_uint4)x, (vec_uint4)y), sign_mask), -1); + + /* Compute abs_x = fmodf(abs_x, 8*abs_y). If y is greater than 0.125*SMAX + * (SMAX is the maximum representable float), then return abs_x. + */ + { + /* Determine ilogb of abs_x and abs_8y and + * extract the mantissas (mant_x, mant_y) + */ + exp_x = spu_rlmask(abs_x, -23); + exp_y = spu_rlmask(abs_8y, -23); + + resultx = spu_or(spu_cmpgt(abs_8y, abs_x), spu_cmpgt(abs_y, spu_splats((unsigned int)0x7E7FFFFF))); + + zero_x = spu_cmpeq(exp_x, 0); +// zero_y = spu_cmpeq(exp_y, 0); + zero_y = spu_cmpgt(implied_1, abs_y ); + +// logb_x = spu_add(exp_x, -127); +// logb_y = spu_add(exp_y, -127); + + mant_x = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x); + mant_y = spu_andc(spu_sel(implied_1, abs_8y, mant_mask), zero_y); + + /* Compute fixed point fmod of mant_x and mant_y. Set the flag, + * result0, to all ones if we detect that the final result is + * ever 0. + */ + result0 = spu_or(zero_x, zero_y); + +// n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); + n = spu_sub((vec_int4)exp_x, (vec_int4)exp_y); // (exp_x-127)-(exp_y-127)=exp_x-exp_y + mask = spu_cmpgt(n, 0); + + while (spu_extract(spu_gather(mask), 0)) { + z = spu_sub(mant_x, mant_y); + + result0 = spu_or(spu_and(spu_cmpeq(z, 0), mask), result0); + + mant_x = spu_sel(mant_x, + spu_sel(spu_add(mant_x, mant_x), + spu_add(z, z), + spu_cmpgt((vec_int4)z, -1)), + mask); + + n = spu_add(n, -1); + mask = spu_cmpgt(n, 0); + } + + z = spu_sub(mant_x, mant_y); + mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)z, -1)); + + result0 = spu_or(spu_cmpeq(mant_x, 0), result0); + + /* Convert the result back to floating point and restore + * the sign. If we flagged the result to be zero (result0), + * zero it. If we flagged the result to equal its input x, + * (resultx) then return x. + */ + cnt = spu_add(spu_cntlz(mant_x), -8); + + // hide hidden bit and shift left side zero + mant_x = spu_rl(spu_andc(mant_x, implied_1), (vec_int4)cnt); + + exp_y = spu_sub(exp_y, cnt); //adjust exponent + result0 = spu_orc(result0, spu_cmpgt((vec_int4)exp_y, 0)); /* zero denorm results */ + exp_y = spu_rl(exp_y, 23); + + result = spu_sel(exp_y, mant_x, mant_mask); + abs_x = spu_sel(spu_andc(result, spu_rlmask(result0, -1)), abs_x, resultx); + result0 = spu_andc(result0, resultx); + } + + /* if (x >= 4*y) + * x -= 4*y + * quotient = 4 + * else + * quotient = 0 + */ + y4 = spu_andc(spu_add(abs_y, spu_splats((unsigned int)0x01000000)), zero_y); + + overflow = spu_cmpgt(abs_y, spu_splats((unsigned int)0x7EFFFFFF)); + not_ge = spu_or(spu_cmpgt(y4, abs_x), overflow); + + abs_x = spu_sel((vec_uint4)spu_sub((vec_float4)abs_x, (vec_float4)y4), abs_x, not_ge); + quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge); + + /* if (x >= 2*y + * x -= 2*y + * quotient += 2 + */ + y2 = spu_andc(spu_add(abs_y, implied_1), zero_y); + not_ge = spu_cmpgt(y2, abs_x); + + abs_x = spu_sel((vec_uint4)spu_sub((vec_float4)abs_x, (vec_float4)y2), abs_x, not_ge); + quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge); + + /* if (2*x > y) + * x -= y + * if (2*x >= y) x -= y + */ + abs_2x = spu_add(abs_x, implied_1); + bias = spu_cmpgt(abs_2x, abs_y); + abs_x = spu_sel(abs_x, (vec_uint4)spu_sub((vec_float4)abs_x, (vec_float4)abs_y), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + bias = spu_andc(bias, spu_rlmaska((vec_uint4)spu_msub((vec_float4)abs_x, spu_splats(2.0f), (vec_float4)abs_y), -31)); + abs_x = spu_sel(abs_x, (vec_uint4)spu_sub((vec_float4)abs_x, (vec_float4)abs_y), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + /* Generate a correct final sign + */ + result = spu_sel(abs_x, ((vec_uint4){0,0,0,0}), result0); // reminder 0 + result = spu_xor(result, sign); // set sign + + quotient = spu_and(quotient, 7); + quotient = spu_sel(spu_sub(0, quotient), quotient, quo_pos); + + *quo = quotient; + + return ((vec_float4)result); +} + + diff --git a/Extras/simdmathlibrary/spu/rintd2.c b/Extras/simdmathlibrary/spu/rintd2.c new file mode 100644 index 000000000..ad733898c --- /dev/null +++ b/Extras/simdmathlibrary/spu/rintd2.c @@ -0,0 +1,67 @@ +/* rintd2 - Round the input to the nearest integer according to + the current rounding mode. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +rintd2(vector double in) +{ + vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL}); + vec_uint4 vec_norm = ((vec_uint4){0x00100000,0,0x00100000,0}); + vec_uint4 vec_bias = ((vec_uint4){0x43300000,0,0x43300000,0}); + vec_double2 addend, xx; + vec_uint4 abs_x; + vec_uint4 is_zerox; + vec_uint4 is_denorm; + vec_uint4 ofs; + + abs_x = spu_andc((vec_uint4)in, (vec_uint4)sign); + + // check denormalized + is_zerox = spu_cmpeq( abs_x, 0); + is_denorm = spu_cmpgt( vec_norm, abs_x ); + is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}))); + is_denorm = spu_andc(is_denorm, is_zerox); + ofs = spu_and( vec_norm, is_denorm); + + xx = spu_or( in, (vec_double2)ofs ); + + /* Add 2^53 and then subtract 2^53 to affect a round to be performed by the + * hardware. Also preserve the input sign so that negative inputs that + * round to zero generate a -0.0. + */ + addend = (vec_double2)spu_and(vec_bias, spu_cmpgt( vec_bias, abs_x )); + addend = spu_sel(addend, in, sign); + + return spu_sel(spu_sub(spu_add(xx, addend), addend), in, sign); + + +} diff --git a/Extras/simdmathlibrary/spu/rintf4.c b/Extras/simdmathlibrary/spu/rintf4.c new file mode 100644 index 000000000..57ac9c64e --- /dev/null +++ b/Extras/simdmathlibrary/spu/rintf4.c @@ -0,0 +1,48 @@ +/* rintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model. + On SPU, the rounding mode for float is always towards zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float rintf4(vector float x) +{ + vector signed int xi; + vector unsigned int inrange; + + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. + + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); +} diff --git a/Extras/simdmathlibrary/spu/roundd2.c b/Extras/simdmathlibrary/spu/roundd2.c new file mode 100644 index 000000000..24f072b27 --- /dev/null +++ b/Extras/simdmathlibrary/spu/roundd2.c @@ -0,0 +1,75 @@ +/* roundd2 - rounds to nearest integer value in floating point format. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +vector double +roundd2 (vector double in) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 exp, shift; + vec_uint4 sign = ((vec_uint4){ 0x80000000, 0, 0x80000000, 0}); + vec_uint4 or_mask, and_mask, mask, addend; + vec_double2 in_hi, out; + + /* Add 0.5 (fixed precision to eliminate rounding issues) + */ + in_hi = spu_shuffle(in, in, splat_hi); + exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF); + + shift = spu_sub(((vec_int4){ 1022, 1043, 1022, 1043}), exp); + + addend = spu_and(spu_rlmask(((vec_uint4){ 0x100000, 0x80000000, 0x100000, 0x80000000}), shift), + spu_cmpgt((vec_uint4)spu_add(shift, -1), -33)); + + in = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4)); + + /* Truncate the result. + */ + in_hi = spu_shuffle(in, in, splat_hi); + exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF); + + shift = spu_sub(((vec_int4){ 1023, 1043, 1023, 1043}), exp); + or_mask = spu_andc(spu_cmpgt(shift, 0), sign); + + and_mask = spu_rlmask(((vec_uint4){ 0xFFFFF, -1, 0xFFFFF, -1}), shift); +// mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -31)), or_mask); + mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -32)), or_mask); + + /* Apply the mask and return the result. + */ + out = spu_andc(in, (vec_double2)(mask)); + + return (out); +} + + diff --git a/Extras/simdmathlibrary/spu/roundf4.c b/Extras/simdmathlibrary/spu/roundf4.c new file mode 100644 index 000000000..609c97897 --- /dev/null +++ b/Extras/simdmathlibrary/spu/roundf4.c @@ -0,0 +1,63 @@ +/* roundf4 - for each of four float slots, round to the nearest integer, + halfway cases are rounded away form zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float roundf4(vector float in) +{ + vec_int4 exp; + vec_uint4 or_mask, and_mask, mask, addend; + vec_float4 out; + + /* Add 0.5 (fixed precision to eliminate rounding issues + */ + exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF)); + + addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp), + spu_cmpgt((vec_uint4)exp, -31)); + + in = (vec_float4)spu_add((vec_uint4)in, addend); + + + /* Truncate the result. + */ + exp = spu_sub(127, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF)); + + or_mask = spu_cmpgt(exp, 0); + + and_mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp); + + mask = spu_or(spu_and(and_mask, spu_cmpgt(exp, -31)), or_mask); + + out = spu_andc(in, (vec_float4)(mask)); + + return (out); +} diff --git a/Extras/simdmathlibrary/spu/rsqrtd2.c b/Extras/simdmathlibrary/spu/rsqrtd2.c new file mode 100644 index 000000000..05b2599e3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/rsqrtd2.c @@ -0,0 +1,96 @@ +/* rsqrtd2 - for each of two double slots, compute reciprocal square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +// +// Handles exceptional values as follows: +// NaN -> NaN +// (+,-)0 -> (+,-)Inf +// +Inf -> +0 +// -Inf -> Nan +// -Finite -> Nan +// Denormal inputs are treated as zero. + +vector double rsqrtd2 (vector double x) +{ + vec_ullong2 expmask, onemask, signmask, evenexp; + vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result; + vec_float4 halff, onef, manf, y0f, y1f; + + expmask = spu_splats(0x7ff0000000000000ull); + onemask = spu_splats(0x0010000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); + halff = spu_splats(0.5f); + half = spu_extend( halff ); + + // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) + // where i = 0 when exponent is even and i = 1 when exponent is odd. + // + // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: + // + // biased_exp = 1023 + exponent + // new_biased_exp = 1023 - (exponent+i)/2 + // = 1023 - (biased_exp-1023+i)/2 + // = (3069 - (biased_exp+i)) / 2 + + evenexp = spu_and( (vec_ullong2)x, onemask ); + man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); + + exp = spu_and( x, (vec_double2)expmask ); + nexp = spu_or( exp, (vec_double2)onemask ); + nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); + + // Compute mantissa part in single precision. + // Convert back to double and multiply with 2^(-(exponent+i)/2), then + // do two Newton-Raphson steps for full precision. + + manf = spu_roundtf( man ); + y0f = spu_rsqrte( manf ); + y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); + y1 = spu_mul( spu_extend( y1f ), nexp ); + y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); + y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); + + // Choose iterated result or special value. + + zero = spu_and( x, (vec_double2)signmask ); + inf = spu_sel( (vec_double2)expmask, x, signmask ); + nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + + result = spu_sel( y3, zero, isinfd2 ( x ) ); + result = spu_sel( result, nan, signbitd2 ( x ) ); + result = spu_sel( result, inf, is0denormd2 ( x ) ); + + return result; +} + diff --git a/Extras/simdmathlibrary/spu/rsqrtf4.c b/Extras/simdmathlibrary/spu/rsqrtf4.c new file mode 100644 index 000000000..e4d6dc45e --- /dev/null +++ b/Extras/simdmathlibrary/spu/rsqrtf4.c @@ -0,0 +1,46 @@ +/* rsqrtf4 - for each of four float slots, compute reciprocal square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +// Undefined if input < 0. + +#include +#include + +vector float rsqrtf4 (vector float x) +{ + // Reciprocal square root estimate and 1 Newton-Raphson iteration. + + vec_float4 y0, y0x, y0half; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + + y0 = spu_rsqrte( x ); + y0x = spu_mul( y0, x ); + y0half = spu_mul( y0, spu_splats(0.5f) ); + return spu_madd( spu_nmsub( y0, y0x, oneish ), y0half, y0 ); +} + diff --git a/Extras/simdmathlibrary/spu/scalbllnd2.c b/Extras/simdmathlibrary/spu/scalbllnd2.c new file mode 100644 index 000000000..cb4e20961 --- /dev/null +++ b/Extras/simdmathlibrary/spu/scalbllnd2.c @@ -0,0 +1,265 @@ +/* scalbllnd2 - Multiply Double by 2 Raised to its Power + For large elements of ex (overflow), returns HUGE_VALF + For small elements of ex (underflow), returns 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#undef SCALBLLND2_ROUND + +#include +#include + +vector double +scalbllnd2(vector double x, vector signed long long ex) +{ + vec_int4 e1, e2; + vec_int4 min = spu_splats(-2099); +// vec_int4 min = spu_splats(-2044); + vec_int4 max = spu_splats( 2098); +// vec_int4 max = spu_splats( 2046); + vec_uint4 cmp_min, cmp_max; + vec_uint4 shift = ((vec_uint4){20, 32, 20, 32}); + vec_double2 f1, f2; + vec_double2 out; + vec_double2 in = x; + vec_int4 exp_in; + + // check input data range + vec_int4 exp0 = spu_shuffle( (vec_int4)ex, (vec_int4)ex, ((vec_uchar16){4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15})); + vec_int4 dmy = spu_shuffle( (vec_int4)spu_splats(0x10000), (vec_int4)ex, ((vec_uchar16){16,1,2,3, 16,1,2,3, 24,1,2,3,24,1,2,3})); + // (-)0xFFFFFFFF80000000 or (+)0x000000007FFFFFFF + vec_int4 msk_range = ((vec_int4){0,0x80000000, 0,0x80000000}); + vec_int4 inrange = spu_addx( (vec_int4)ex, msk_range, spu_rlqwbyte(spu_genc((vec_int4)ex, msk_range), 4)); + inrange = (vec_int4)spu_cmpeq( inrange, 0 ); + inrange = spu_shuffle(inrange,inrange,((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + // select dummy over ranged data or input data + vec_int4 exp = spu_sel( dmy, exp0, (vec_uint4)inrange); + exp_in = exp; + /* Clamp the specified exponent to the range -2044 to 2046. + */ + cmp_min = spu_cmpgt(exp, min); + cmp_max = spu_cmpgt(exp, max); + exp = spu_sel(min, exp, cmp_min); + exp = spu_sel(exp, max, cmp_max); + + /* Generate the factors f1 = 2^e1 and f2 = 2^e2 + */ + e1 = spu_rlmaska(exp, -1); + e2 = spu_sub(exp, e1); + + f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift); + + vec_double2 otmp = spu_mul(x, f1); + vec_uint4 fpscr1 = spu_mffpscr(); + + f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift); + + out = spu_mul(otmp, f2); + vec_uint4 fpscr2 = spu_mffpscr(); + + /* Compute the product x * 2^e1 * 2^e2 + */ +// out = spu_mul(spu_mul(x, f1), f2); + + // check floating point register DENORM bit + vec_uint4 fpscr0, fpscr; + fpscr0 = spu_or(fpscr1, fpscr2); + fpscr = spu_shuffle(fpscr0, fpscr0, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,10,0x80,0x80,0x80,6,0x80,0x80,0x80,0x80,0x80})); + fpscr = spu_or(fpscr0, fpscr); + if ( __builtin_expect(spu_extract(fpscr, 1) == 0, 1) ) return out; + + + ////////////////////// + // Denormalized calc// + ////////////////////// + + vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}; + vec_uint4 signmask = ((vec_uint4){0x80000000,0,0x80000000,0}); + vec_int4 zeros = spu_splats(0); + vec_uchar16 msk_64_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}); + + //check input was zero + vec_uint4 x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1})); + vec_uint4 x_zero = spu_cmpeq( x_body, (vec_uint4)zeros ); + x_zero = spu_and( x_zero, spu_shuffle(x_zero,x_zero,msk_64_eq)); + + // check Denormalized input + vec_int4 cnt_zero = (vec_int4)spu_cntlz(x_body); + vec_uint4 is_den = (vec_uint4)spu_cmpgt(cnt_zero, 11); // Denormalized data 000XXXXX XXXXXXXX + is_den = spu_shuffle( is_den, is_den, splat_msb); + is_den = spu_sel(is_den, (vec_uint4)zeros, x_zero); // exclude zero from denormalized + + // count 0bits for 64bit + vec_uint4 cnt_ex = (vec_uint4)spu_cmpgt(cnt_zero, 31); // Denormalized data 00000000 XXXXXXXX + vec_int4 cnt_z = spu_shuffle( cnt_zero, cnt_zero, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})); + cnt_zero = spu_add(cnt_zero, spu_sel(zeros, cnt_z, cnt_ex)); + cnt_zero = spu_shuffle(cnt_zero, cnt_zero, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + // extract each 64bit data + x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x000FFFFF,-1,0x000FFFFF,-1})); + vec_uint4 mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)exp_in, -31); + sign = spu_shuffle(sign, sign, splat_msb); + + // set max shift count + vec_int4 sht = spu_add( cnt_zero, ((vec_int4){-11,-64,-11,-64})); + + // denorm & exp+ shift left + vec_uint4 cmp = spu_cmpgt( sht, exp_in); + vec_int4 sht_l = spu_sel(sht, exp_in, cmp); + int shtl0 = spu_extract(sht_l, 0); + int shtl1 = spu_extract(sht_l, 2); + vec_uint4 mant0l = spu_slqwbytebc( spu_slqw(mant0, shtl0), shtl0 ); + vec_uint4 mant1l = spu_slqwbytebc( spu_slqw(mant1, shtl1), shtl1 ); + vec_int4 expp = spu_shuffle(spu_sub(exp_in, sht_l), zeros, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11})); + + exp0 = spu_sel( expp, exp_in, sign ); // select plus or minus caluc + vec_uint4 mantl = spu_shuffle( mant0l, mant1l, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); + vec_uint4 mant = spu_sel( mantl, (vec_uint4)x, sign); + exp = spu_sel( exp_in, exp0, is_den ); // select denormalized + x = (vec_double2)spu_sel( (vec_uint4)x, mant, is_den); + + + ////////////////////////////////////////////////////////////////////////// + // from ldexpf4 + vec_int4 expmask = ((vec_int4){0x7FF00000, 0, 0x7FF00000, 0}); + e1 = spu_and((vec_int4)x, expmask); + e2 = spu_rlmask(e1,-20); + + vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 2046); + vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-2044), exp); + minmask = spu_or (minmask, (vec_uchar16)x_zero); + + vec_int4 esum = spu_add(e2, exp); + + maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046)); + maxmask = spu_shuffle(maxmask, maxmask, splat_msb); +// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); + minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); + minmask = spu_shuffle(minmask, minmask, splat_msb); + + // check denorm + vec_uint4 mxmask = spu_and(spu_cmpgt(e2, 0), ((vec_uint4){0x00100000,0,0x00100000,0})); // not denorm + vec_int4 esum2 = spu_sub(esum, (vec_int4)spu_rlmask(mxmask, -20)); // reverse to norm + vec_uint4 mrange = spu_and(spu_cmpgt(zeros, esum2), spu_cmpgt(esum2, -55)); // denorm range + mrange = spu_shuffle(mrange, mrange, splat_msb); + + vec_int4 sht_r = spu_sel(spu_splats(-54), esum2, spu_cmpgt(esum2, spu_splats(-54)) ); + vec_int4 sht_rh = spu_add( sht_r, ((vec_int4){7,7,7,7})); + + x_body = spu_or( x_body, mxmask ); + mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) ); + vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) ); + +#ifdef SCALBLLND2_ROUND + // check current round mode + fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0})); + fpscr1 = spu_and(fpscr, ((vec_uint4){0,0x300,0,0})); + + // prepare round data + vec_uint4 rnd0 = spu_slqwbytebc( spu_slqw( mant0r, 31), 31); + vec_uint4 rnd1 = spu_slqwbytebc( spu_slqw( mant1r, 31), 31); + vec_uint4 rnd0w = (vec_uint4)spu_cntb( (vec_uchar16)rnd0 ); + vec_uint4 rnd1w = (vec_uint4)spu_cntb( (vec_uchar16)rnd1 ); + rnd0w = spu_or( spu_slqwbyte(rnd0w,4), spu_slqwbyte(rnd0w,8)); + rnd1w = spu_or( spu_slqwbyte(rnd1w,4), spu_slqwbyte(rnd1w,8)); + rnd0 = spu_or( rnd0, rnd0w); + rnd1 = spu_or( rnd1, rnd1w); + + // nearest + // check half + vec_uint4 hit0 = spu_cmpeq(rnd0, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out + vec_uint4 hit1 = spu_cmpeq(rnd1, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out + vec_uint4 add0 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit0); + vec_uint4 add1 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit1); + // check greater than half + rnd0 = spu_and( rnd0, ((vec_uint4){0,0x7FFFFFFF,0,0})); + rnd1 = spu_and( rnd1, ((vec_uint4){0,0x7FFFFFFF,0,0})); + hit0 = spu_cmpgt(rnd0, ((vec_uint4){0,0x40000000,0,0})); + hit1 = spu_cmpgt(rnd1, ((vec_uint4){0,0x40000000,0,0})); + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + // select if fp0 + add0 = spu_sel((vec_uint4)zeros, add0, spu_cmpeq(fpscr0, (vec_uint4)zeros)); + add1 = spu_sel((vec_uint4)zeros, add1, spu_cmpeq(fpscr1, (vec_uint4)zeros)); + + // toward zero do nothing + // upward + sign = spu_rlmaska((vec_uint4)in, -31); + vec_uint4 sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,0,0,0,0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 sign1 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,8,8,8,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); + vec_uint4 hit0w = spu_cmpgt(rnd0, ((vec_uint4){0,0,0,0})); + vec_uint4 hit1w = spu_cmpgt(rnd1, ((vec_uint4){0,0,0,0})); + + hit0 = spu_and(hit0w, spu_nor(sign0, sign0)); + hit1 = spu_and(hit1w, spu_nor(sign1, sign1)); + hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0x800,0,0}))); + hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x200,0,0}))); + // select if fp2 + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + + // downward + hit0 = spu_and(hit0w, sign0); + hit1 = spu_and(hit1w, sign1); + hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0xc00,0,0}))); + hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x300,0,0}))); + // select if fp3 + add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0); + add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1); + + // calc round + mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4)); + mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4)); + +#endif // SCALBLLND2_ROUND + + vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); + + // select right answer + x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask); + x = spu_sel(x, (vec_double2)zeros, minmask); + x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask); + + out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange); + + // check Infinity,NaN + vec_uint4 is_inf = spu_cmpeq(e1, expmask); + is_inf = spu_and( is_inf, spu_shuffle(is_inf,is_inf,msk_64_eq)); + out = (vec_double2)spu_sel((vec_uint4)out , (vec_uint4)in, is_inf); + + out = spu_sel(out, in, (vec_ullong2)signmask); + return out; +} + + diff --git a/Extras/simdmathlibrary/spu/scalbnf4.c b/Extras/simdmathlibrary/spu/scalbnf4.c new file mode 100644 index 000000000..acc62adc5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/scalbnf4.c @@ -0,0 +1,72 @@ +/* scalbnf4 computes x * 2^exp. This function is computed without + the assistence of any floating point operations and as such does + not set any floating point exceptions. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +scalbnf4(vector float x, vector signed int n) +{ + vec_int4 x_exp; + vec_uint4 zero, overflow; + vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000); + vec_float4 out; + + /* Extract exponent from x. If the exponent is 0, then + * x is either 0 or a denorm and x*2^exp is a zero. + */ + x_exp = spu_and(spu_rlmask((vec_int4)x, -23), 0xFF); + + zero = spu_cmpeq(x_exp, 0); + + /* Compute the expected exponent and determine if the + * result is within range. + */ + x_exp = spu_add(n, x_exp); + + zero = spu_orc(zero, spu_cmpgt(x_exp, 0)); + +// overflow = spu_rlmask(spu_cmpgt(x_exp, 255), -1); + overflow = spu_cmpgt(x_exp, 255); + + /* Merge the expect exponent with x's mantissa. Zero the + * result if underflow and force to max if overflow. + */ + out = spu_sel(x, (vec_float4)spu_rl(x_exp, 23), exp_mask); + out = spu_andc(out, (vec_float4)zero); + out = spu_or(out, (vec_float4)overflow); + // add sign bit + out = spu_sel(out, x, spu_splats((unsigned int)0x80000000)); + + return out; +} + + diff --git a/Extras/simdmathlibrary/spu/signbitd2.c b/Extras/simdmathlibrary/spu/signbitd2.c new file mode 100644 index 000000000..7978aa4a3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/signbitd2.c @@ -0,0 +1,43 @@ +/* signbitd2 - for each of two double slots, if input has negative sign bit return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned long long signbitd2 (vector double x) +{ + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + cmp = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)x ); + cmp = spu_shuffle( cmp, cmp, even ); + + return cmp; +} + diff --git a/Extras/simdmathlibrary/spu/signbitf4.c b/Extras/simdmathlibrary/spu/signbitf4.c new file mode 100644 index 000000000..6ab9103a8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/signbitf4.c @@ -0,0 +1,38 @@ +/* signbitf4 - for each element of vector x, return a mask of ones if x' has signbit one, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector unsigned int +signbitf4 (vector float x) +{ + return spu_cmpgt( spu_splats(0), (vec_int4)x ); +} + diff --git a/Extras/simdmathlibrary/spu/sincos_c.h b/Extras/simdmathlibrary/spu/sincos_c.h new file mode 100644 index 000000000..d29fc08f0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/sincos_c.h @@ -0,0 +1,78 @@ +/* Common constants for Sin/Cos/Tan + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SINCOS_C2__ +#define __SINCOS_C2__ + +// +// Common constants used to evaluate sind2/cosd2/tand2 +// +#define _SINCOS_CC0D 0.00000000206374484196 +#define _SINCOS_CC1D -0.00000027555365134677 +#define _SINCOS_CC2D 0.00002480157946764225 +#define _SINCOS_CC3D -0.00138888888730525966 +#define _SINCOS_CC4D 0.04166666666651986722 +#define _SINCOS_CC5D -0.49999999999999547304 + +#define _SINCOS_SC0D 0.00000000015893606014 +#define _SINCOS_SC1D -0.00000002505069049138 +#define _SINCOS_SC2D 0.00000275573131527032 +#define _SINCOS_SC3D -0.00019841269827816117 +#define _SINCOS_SC4D 0.00833333333331908278 +#define _SINCOS_SC5D -0.16666666666666612594 + +#define _SINCOS_KC1D (13176794.0 / 8388608.0) +#define _SINCOS_KC2D 7.5497899548918821691639751442098584e-8 + + +// +// Common constants used to evaluate sinf4/cosf4/tanf4 +// +#define _SINCOS_CC0 -0.0013602249f +#define _SINCOS_CC1 0.0416566950f +#define _SINCOS_CC2 -0.4999990225f +#define _SINCOS_SC0 -0.0001950727f +#define _SINCOS_SC1 0.0083320758f +#define _SINCOS_SC2 -0.1666665247f + +#define _SINCOS_KC1 1.57079625129f +#define _SINCOS_KC2 7.54978995489e-8f + +// +// Common constants used to evaluate sinf4est/cosf4est +// +#define _SINCOS_R1 -0.1666665668f +#define _SINCOS_R2 0.8333025139e-2f +#define _SINCOS_R3 -0.1980741872e-3f +#define _SINCOS_R4 0.2601903036e-5f + +#define _SINCOS_C1 (201.0f/64.0f) +#define _SINCOS_C2 9.67653589793e-4f + +#endif diff --git a/Extras/simdmathlibrary/spu/sincosf4.c b/Extras/simdmathlibrary/spu/sincosf4.c new file mode 100644 index 000000000..0a4fc9d94 --- /dev/null +++ b/Extras/simdmathlibrary/spu/sincosf4.c @@ -0,0 +1,104 @@ +/* sincosf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sincos_c.h" + +// +// Computes both the sine and cosine of the all four slots of x +// by using a polynomial approximation. +// +void sincosf4 (vector float x, vector float *s, vector float *c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 q; + vec_int4 offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); + + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = spu_and(q,spu_splats((int)0x3)); + offsetCos = spu_add(spu_splats(1),offsetSin); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); + xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); + vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); + + vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); + vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); + + vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uchar16 sinMask = (vec_uchar16)spu_cmpeq(spu_and(offsetSin,(int)0x1),spu_splats((int)0)); + vec_uchar16 cosMask = (vec_uchar16)spu_cmpeq(spu_and(offsetCos,(int)0x1),spu_splats((int)0)); + *s = spu_sel(cx,sx,sinMask); + *c = spu_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = (vec_uchar16)spu_cmpeq(spu_and(offsetSin,(int)0x2),spu_splats((int)0)); + cosMask = (vec_uchar16)spu_cmpeq(spu_and(offsetCos,(int)0x2),spu_splats((int)0)); + + *s = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)*s),*s,sinMask); + *c = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)*c),*c,cosMask); + +} + diff --git a/Extras/simdmathlibrary/spu/sind2.c b/Extras/simdmathlibrary/spu/sind2.c new file mode 100644 index 000000000..5df3417ac --- /dev/null +++ b/Extras/simdmathlibrary/spu/sind2.c @@ -0,0 +1,130 @@ +/* sind2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sincos_c.h" + +// +// Computes the sine of the each of two double slots. +// +vector double sind2 (vector double x) +{ + vec_double2 xl,xl2,xl3,res; + vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull); + + // Range reduction using : x = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); + vec_float4 xf = spu_roundtf(xl); + vec_int4 q = spu_convts(xf,0); + q = spu_shuffle(q,q,copyEven); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = spu_and(q,spu_splats(0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_double2 qd = spu_extend(qf); + vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x); + xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1); + + // Check if |xl| is a really small number + // + vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull)); + vec_ullong2 isTiny = (vec_ullong2)isgreaterd2 (tiny,absXl); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and + // sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5) + // + + vec_double2 ct0 = spu_mul(xl2,xl2); + vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D)); + vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D)); + vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D)); + vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D)); + vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D)); + vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D)); + vec_double2 ct4 = spu_madd(ct2,ct0,ct3); + vec_double2 st4 = spu_madd(st2,ct0,st3); + vec_double2 ct5 = spu_mul(ct0,ct0); + + vec_double2 ct6 = spu_madd(ct5,ct1,ct4); + vec_double2 st6 = spu_madd(ct5,st1,st4); + + vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0)); + vec_double2 sx = spu_madd(st6,xl3,xl); + + // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0 + // + sx = spu_sel(sx,xl,isTiny); + cx = spu_sel(cx,spu_splats(1.0),isTiny); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); + res = spu_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); + mask2 = spu_shuffle(mask2,mask2,copyEven); + res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2); + + // if input = +/-Inf return NAN + // + res = spu_sel(res, nan, isnand2 (x)); + + // if input = 0 or denorm return input + // + vec_ullong2 zeroMask = is0denormd2 (x); + res = spu_sel(res,x,zeroMask); + + + return res; +} + diff --git a/Extras/simdmathlibrary/spu/sinf4.c b/Extras/simdmathlibrary/spu/sinf4.c new file mode 100644 index 000000000..6ad160825 --- /dev/null +++ b/Extras/simdmathlibrary/spu/sinf4.c @@ -0,0 +1,97 @@ +/* sinf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sincos_c.h" + +// +// Computes the sine of each of the four slots by using a polynomia approximation +// +vector float sinf4 (vector float x) +{ + vec_float4 xl,xl2,xl3,res; + vec_int4 q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = spu_and(q,spu_splats((int)0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); + xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); + vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); + + vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); + vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); + + vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); + res = spu_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); + res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2); + + return res; + +} + diff --git a/Extras/simdmathlibrary/spu/sqrtd2.c b/Extras/simdmathlibrary/spu/sqrtd2.c new file mode 100644 index 000000000..c08f4754e --- /dev/null +++ b/Extras/simdmathlibrary/spu/sqrtd2.c @@ -0,0 +1,101 @@ +/* sqrtd2 - for each of two double slots, compute square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +// +// Handles exceptional values as follows: +// NaN -> NaN +// -Inf -> Nan +// -Finite -> Nan +// Denormal inputs are treated as zero. + +vector double +sqrtd2 (vector double x) +{ + vec_ullong2 expmask, onemask, signmask, evenexp; + vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result; + vec_float4 halff, onef, manf, y0f, y1f; + + expmask = spu_splats(0x7ff0000000000000ull); + onemask = spu_splats(0x0010000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); + halff = spu_splats(0.5f); + half = spu_extend( halff ); + + // First compute reciprocal square root. + // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) + // where i = 0 when exponent is even and i = 1 when exponent is odd. + // + // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: + // + // biased_exp = 1023 + exponent + // new_biased_exp = 1023 - (exponent+i)/2 + // = 1023 - (biased_exp-1023+i)/2 + // = (3069 - (biased_exp+i)) / 2 + + evenexp = spu_and( (vec_ullong2)x, onemask ); + man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); + + exp = spu_and( x, (vec_double2)expmask ); + nexp = spu_or( exp, (vec_double2)onemask ); + nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); + + // Compute mantissa part in single precision. + // Convert back to double and multiply with 2^(-(exponent+i)/2), then + // do two Newton-Raphson steps for full precision. + + manf = spu_roundtf( man ); + y0f = spu_rsqrte( manf ); + y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); + y1 = spu_mul( spu_extend( y1f ), nexp ); + y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); + y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); + + // Multiply by input to get square root. + + y3 = spu_mul( y3, x ); + + // Choose iterated result or special value. + + zero = spu_and( x, (vec_double2)signmask ); + inf = (vec_double2)expmask; + nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + + result = spu_sel( y3, inf, isinfd2 ( x ) ); + result = spu_sel( result, nan, isnand2 ( x ) ); + result = spu_sel( result, zero, is0denormd2 ( x ) ); + + return result; +} + diff --git a/Extras/simdmathlibrary/spu/sqrtf4.c b/Extras/simdmathlibrary/spu/sqrtf4.c new file mode 100644 index 000000000..8e8b94b45 --- /dev/null +++ b/Extras/simdmathlibrary/spu/sqrtf4.c @@ -0,0 +1,47 @@ +/* sqrtf4 - for each of four float slots, compute square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +// Undefined if input < 0. + +#include +#include + +vector float sqrtf4 (vector float x) +{ + // Reciprocal square root estimate and 1 Newton-Raphson iteration. + + vec_float4 y0, y0x, y0xhalf; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + + y0 = spu_rsqrte( x ); + y0x = spu_mul( y0, x ); + y0xhalf = spu_mul( y0x, spu_splats(0.5f) ); + return spu_madd( spu_nmsub( y0, y0x, oneish ), y0xhalf, y0x ); +} + diff --git a/Extras/simdmathlibrary/spu/tand2.c b/Extras/simdmathlibrary/spu/tand2.c new file mode 100644 index 000000000..479436d31 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tand2.c @@ -0,0 +1,117 @@ +/* tand2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#define _TAND2_CC0 -0.00020844956382258822 +#define _TAND2_CC1 0.02334489464693293380 +#define _TAND2_CC2 -0.46161689768996201755 +#define _TAND2_SC0 -0.00000748373924372997 +#define _TAND2_SC1 0.00280592875035233052 +#define _TAND2_SC2 -0.12828356435663158978 +#define _TAND2_KC1 (13176794.0 / 8388608.0) +#define _TAND2_KC2 7.5497899548918821691639751442098584e-8 + +/* + * Computes the tangent of the given angles by first reducing the + * range to [-pi/4..pi/4] and performing the appropriate sin/cos ratio + */ +vector double +tand2 (vector double x) +{ + vec_double2 xl,x2,x3,res; + vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); + + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); + vec_float4 xf = spu_roundtf(xl); + vec_int4 q = spu_convts(xf,0); + q = spu_shuffle(q,q,copyEven); + + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_double2 qd = spu_extend(qf); + vec_double2 p1 = spu_nmsub(qd,spu_splats(_TAND2_KC1),x); + xl = spu_nmsub(qd,spu_splats(_TAND2_KC2),p1); + + // Compute x^2 and x^3 + // + x2 = spu_mul(xl,xl); + x3 = spu_mul(x2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * ((C0 * x2 + C1) * x2 + C2), and + // sx = x + x3 * ((S0 * x2 + S1) * x2 + S2) + // + vec_double2 ct1 = spu_madd(spu_splats(_TAND2_CC0),x2,spu_splats(_TAND2_CC1)); + vec_double2 st1 = spu_madd(spu_splats(_TAND2_SC0),x2,spu_splats(_TAND2_SC1)); + + vec_double2 ct2 = spu_madd(ct1,x2,spu_splats(_TAND2_CC2)); + vec_double2 st2 = spu_madd(st1,x2,spu_splats(_TAND2_SC2)); + + vec_double2 cx = spu_madd(ct2,x2,spu_splats(1.0)); + vec_double2 sx = spu_madd(st2,x3,xl); + + + // Compute both cx/sx and sx/cx + // + vec_double2 cxosx = divd2(cx,sx); + vec_double2 sxocx = divd2(sx,cx); + + vec_double2 ncxosx = (vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)cxosx); + + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vec_ullong2 mask = (vec_ullong2)spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); + res = spu_sel(ncxosx,sxocx,mask); + + // If input = +/-Inf return NAN + // + res = spu_sel(res,nan,isinfd2 (x)); + + // If input =0 or denorm return input + // + res = spu_sel(res,x, is0denormd2 (x)); + + return res; +} diff --git a/Extras/simdmathlibrary/spu/tanf4.c b/Extras/simdmathlibrary/spu/tanf4.c new file mode 100644 index 000000000..10c53c4fe --- /dev/null +++ b/Extras/simdmathlibrary/spu/tanf4.c @@ -0,0 +1,91 @@ +/* tanf4 - for each of four float slots, compute the tangent by using a polynomial approximation. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + + +#define _TAN_KC1 1.57079625129f +#define _TAN_KC2 7.54978995489e-8f + +vector float +tanf4 (vector float x) +{ + vector float xl,x2,x3,res; + vector signed int q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); + + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(_TAN_KC1),x); + xl = spu_nmsub(qf,spu_splats(_TAN_KC2),p1); + + // Compute x^2 and x^3 + // + x2 = spu_mul(xl,xl); + x3 = spu_mul(x2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * (C0 * x2 + C1), and + // sx = xl + x3 * S0 + // + vec_float4 ct2 = spu_madd(spu_splats( 0.0097099364f),x2,spu_splats(-0.4291161787f)); + + vec_float4 cx = spu_madd(ct2,x2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(spu_splats(-0.0957822992f),x3,xl); + + + // Compute both cx/sx and sx/cx + // + vec_float4 cxosx = divf4(cx,sx); + vec_float4 sxocx = divf4(sx,cx); + + vec_float4 ncxosx = (vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)cxosx); + + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vec_uchar16 mask = (vec_uchar16)spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); + res = spu_sel(ncxosx,sxocx,mask); + + return res; +} diff --git a/Extras/simdmathlibrary/spu/tests/Makefile b/Extras/simdmathlibrary/spu/tests/Makefile new file mode 100644 index 000000000..014fb4345 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/Makefile @@ -0,0 +1,119 @@ +# Makefile for testsuite for the SPU SIMD math library +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +TESTS = fabsd2 fabsf4 truncf4 divf4 recipd2 divd2 sqrtf4 \ + absi4 sqrtd2 rsqrtf4 rsqrtd2 copysignf4 remainderf4 \ + recipf4 copysignd2 negatef4 negated2 modff4 ceilf4 \ + fminf4_fmaxf4 floorf4 negatei4 divi4 llroundd2 llroundf4 \ + llrintf4 isequalf4 isequald2 islessgreaterf4 islessgreaterd2 \ + islessf4 islessd2 isgreaterf4 isgreaterd2 islessequalf4 islessequald2 \ + isgreaterequalf4 isgreaterequald2 isnanf4 isnand2 isinff4 isinfd2 \ + isfinitef4 isfinited2 isnormalf4 isnormald2 isunorderedf4 isunorderedd2 \ + is0denormf4 is0denormd2 signbitf4 signbitd2 llrintd2 \ + roundf4 iroundf4 rintf4 irintf4 fmind2_fmaxd2 fdimd2 \ + fmad2 nextafterd2 nextafterf4 fpclassifyf4 fpclassifyd2 nearbyintd2 nearbyintf4 \ + llabsi2 truncd2 roundd2 rintd2 negatell2 hypotf4 hypotd2 divu4 modfd2 \ + lldivu2 ceild2 floord2 ldexpd2 ilogbd2 ilogbf4 logbf4 logbd2 scalbnf4 \ + scalbllnd2 lldivi2 frexpf4 frexpd2 remquof4 remquod2 fmodd2 remainderd2 + + +ALL_TESTS = $(TESTS) + +INCLUDES_SPU = -I../../ + +CROSS_SPU = spu- +AR_SPU = $(CROSS_SPU)ar +CC_SPU = $(CROSS_SPU)gcc +CXX_SPU = $(CROSS_SPU)g++ +TEST_CMD_SPU = + +CFLAGS_SPU=$(INCLUDES_SPU) -O2 -W -Wall +LDFLAGS_SPU=-L../ -l$(LIB_BASE) -lm + +MAKE_DEFS = \ + LIB_BASE='$(LIB_BASE)' \ + LIB_NAME='$(LIB_NAME)' \ + STATIC_LIB='$(STATIC_LIB)' \ + CROSS_SPU='$(CROSS_SPU)' \ + AR_SPU='$(AR_SPU)' \ + CC_SPU='$(CC_SPU)' \ + CXX_SPU='$(CXX_SPU)' \ + TEST_CMD_SPU='$(TEST_CMD_SPU)' + +LIB_BASE = simdmath +LIB_NAME = lib$(LIB_BASE) +STATIC_LIB = $(LIB_NAME).a + +TEST_CMD = $(TEST_CMD_SPU) + +COMMON_OBJS = testutils.o + +all: $(ALL_TESTS) + + +$(TESTS): %: %.o ../$(STATIC_LIB) $(COMMON_OBJS) + $(CC_SPU) $@.o $(COMMON_OBJS) $(LDFLAGS_SPU) -o $@ + +clean: + rm -f *.o + rm -f $(TESTS) + rm -f core* + +check: $(ALL_TESTS) + for test in $(ALL_TESTS); do \ + echo "TEST $${test}"; \ + if $(TEST_CMD) ./$${test}; then \ + pass="$$pass $$test"; \ + else \ + fail="$$fail $$test"; \ + fi \ + done; \ + echo; echo "PASS:$$pass"; echo "FAIL:$$fail"; \ + test -z "$$fail" + +../$(STATIC_LIB): + cd ../;$(MAKE) $(MAKE_DEFS) $(STATIC_LIB) + +%.o: %.c + $(CC_SPU) $(CFLAGS_SPU) -c $< + +#---------- +# C++ +#---------- +%.o: %.C + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cpp + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cc + $(CXX_SPU) $(CFLAGS_SPU) -c $< + +%.o: %.cxx + $(CXX_SPU) $(CFLAGS_SPU) -c $< + diff --git a/Extras/simdmathlibrary/spu/tests/absi4.c b/Extras/simdmathlibrary/spu/tests/absi4.c new file mode 100644 index 000000000..bd150e4e1 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/absi4.c @@ -0,0 +1,90 @@ +/* Test absi4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040908101807EJL","EJL", "abs"); + + int x0n = hide_int(0); + int x0p = hide_int(0); + int x1n = hide_int(-1); + int x1p = hide_int(1); + int x2n = hide_int(-83532); + int x2p = hide_int(83532); + + vec_int4 x0n_v = spu_splats(x0n); + vec_int4 x0p_v = spu_splats(x0p); + vec_int4 x1n_v = spu_splats(x1n); + vec_int4 x1p_v = spu_splats(x1p); + vec_int4 x2n_v = spu_splats(x2n); + vec_int4 x2p_v = spu_splats(x2p); + + int res; + vec_int4 res_v; + + TEST_START("absi4"); + res_v = absi4(x0n_v); + TEST_CHECK("20040908103824EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = absi4(x0p_v); + TEST_CHECK("20040908103903EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = absi4(x1n_v); + TEST_CHECK("20040908103905EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = absi4(x1p_v); + TEST_CHECK("20040908114003EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = absi4(x2n_v); + TEST_CHECK("20040908114714EJL", allequal_int4( res_v, x2p_v ), 0); + res_v = absi4(x2p_v); + TEST_CHECK("20040908114715EJL", allequal_int4( res_v, x2p_v ), 0); + + TEST_START("abs"); + res = abs(x0n); + TEST_CHECK("20040908114718EJL", res == x0p, 0); + res = abs(x0p); + TEST_CHECK("20040908114719EJL", res == x0p, 0); + res = abs(x1n); + TEST_CHECK("20040908114720EJL", res == x1p, 0); + res = abs(x1p); + TEST_CHECK("20040908114721EJL", res == x1p, 0); + res = abs(x2n); + TEST_CHECK("20040908114722EJL", res == x2p, 0); + res = abs(x2p); + TEST_CHECK("20040908114723EJL", res == x2p, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/ceild2.c b/Extras/simdmathlibrary/spu/tests/ceild2.c new file mode 100644 index 000000000..7712c5b1c --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/ceild2.c @@ -0,0 +1,156 @@ +/* Test roundd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ ceild2for each of two doule slots, + * round up to smallest integer not less than the value. + * + *@brief + * boundary test for ceild2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef union { + struct { + double xxx[2]; + double ans[2]; + } dbl; + struct { + unsigned long long xxx[2]; + unsigned long long ans[2]; + } ull; +} TestVec_Roundd2; + +int main() +{ + TestVec_Roundd2 test_a[] = { + { + ull:{ + // 0 -> 0 , -0 -> -0 + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // -Inf -> -Inf , Inf -> Inf + {0xFFF0000000000000ULL,0x7FF0000000000000ULL}, + {0xFFF0000000000000ULL,0x7FF0000000000000ULL} + } + },{ + ull:{ + // MAX -> MAX , MIN -> MIN + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x0000000000000001ULL,0x8000000000000010ULL}, + {0x3ff0000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x800FFFFFFFFFFFFFULL,0x000FFFFFFFFFFFFFULL}, + {0x8000000000000000ULL,0x3ff0000000000000ULL} + } + },{ + ull:{ + // border + {0x4320000000000001ULL,0xC320000000000001ULL}, + {0x4320000000000002ULL,0xC320000000000000ULL} + } + },{ + dbl:{ + {1.0, -1.0}, + {1.0, -1.0} + } + },{ + dbl:{ + {0.5, -0.5}, + {1.0, -0.0} + } + },{ + dbl:{ + {-2.75, 3.2}, + {-2.0, 4.0} + } + },{ + ull:{ + // Nan + {0xFFF0000000000001ULL,0x7FF0000000000001ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL} + } + },{ + ull:{ + {0ULL,0ULL}, + {0ULL,0ULL} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + //vec_double2 input; + + TEST_SET_START("51651906100000NM","CED", "ceild2"); + + TEST_START("ceild2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ull.xxx[0] == 0) && (test_a[ii].ull.xxx[1] == 0) ) break; + //input = *((vec_double2 *)&test_a[ii].dbl.xxx[0]); + res_v = ceild2 (*((vec_double2 *)&test_a[ii].dbl.xxx[0]) ); + sprintf(msg,"5165190610%04dCED", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, (vec_llong2)*((vec_double2 *)&test_a[ii].dbl.ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/ceilf4.c b/Extras/simdmathlibrary/spu/tests/ceilf4.c new file mode 100644 index 000000000..d130176e2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/ceilf4.c @@ -0,0 +1,108 @@ +/* Test ceilf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040916103300EJL","EJL", "ceilf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4b000000; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(1.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 x0i_v = spu_splats(x0i); + vec_float4 x1_v = spu_splats(x1); + vec_float4 x1i_v = spu_splats(x1i); + vec_float4 x2_v = spu_splats(x2); + vec_float4 x2i_v = spu_splats(x2i); + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3i_v = spu_splats(x3i); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4i_v = spu_splats(x4i); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5i_v = spu_splats(x5i); + + float res; + vec_float4 res_v; + + TEST_START("ceilf4"); + res_v = ceilf4(x0_v); + TEST_CHECK("20040916103310EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = ceilf4(x1_v); + TEST_CHECK("20040916103324EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = ceilf4(x2_v); + TEST_CHECK("20040916103334EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = ceilf4(x3_v); + TEST_CHECK("20040916103341EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = ceilf4(x4_v); + TEST_CHECK("20040916103350EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = ceilf4(x5_v); + TEST_CHECK("20040916103357EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_START("ceilf"); + res = ceilf(x0); + TEST_CHECK("20040916103407EJL", res == x0i, 0); + res = ceilf(x1); + TEST_CHECK("20040916103418EJL", res == x1i, 0); + res = ceilf(x2); + TEST_CHECK("20040916103428EJL", res == x2i, 0); + res = ceilf(x3); + TEST_CHECK("20040916103437EJL", res == x3i, 0); + res = ceilf(x4); + TEST_CHECK("20040916103448EJL", res == x4i, 0); + res = ceilf(x5); + TEST_CHECK("20040916103457EJL", res == x5i, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/common-test.h b/Extras/simdmathlibrary/spu/tests/common-test.h new file mode 100644 index 000000000..4f931deca --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/common-test.h @@ -0,0 +1,201 @@ +/* SIMD math library - common testsuite part for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +static inline unsigned int clock() +{ + unsigned int ret; + int tmp = 0; + __asm __volatile__ ( "syscall %0,%1,0x2b\n" + : "=r" (ret) + : "r" (tmp) + : "memory" ); + return (ret); +} +// Test files begin with TEST_SET_START("your initials","test set description") +// Individual tests begin with TEST_START("name of test") +// and end with TEST_PASS(), TEST_FAIL("reason for failure") or TEST_CHECK() +// Or you can run a test encapsulated in a function with: +// TEST_FUNCTION("name of test", function(), "reason for failure") +// +// The clock starts when you call TEST_START and stops with TEST_PASS, TEST_FAIL or TEST_CHECK +// After a start there can be several PASS, FAIL or CHECK calls, each one counts as a test, time is measured from the prior call +// +char + *__initials, // Test owner's initials + *__description, // short descriptive name for this test set + *__name, // name of the currently running test + *__set_id; // id of the the test set +int +// __zip=0, + __success=1, // set to 0 if any tests failed + __count, // Total number of tests run + __passed; // Total number of tests passed +unsigned int + __ttemp, + __time, // For timing tests (usually start time of last test) + __ttime; // Cumulative test runtime NOT counting runtime of the TEST macros + +// TEST_SET_START +// Call at the start of a set of related tests to identify them +// Prints a "start of set banner message" +// set_id - unique test set identifyer a time in the format yyyymmddhhmmss followed by your initials ie: 20040716104615GAC +// initials - your initials +// description - brief descriptive name for this test set +#define TEST_SET_START(set_id,initials,description) \ + do { \ + __set_id=set_id; \ + __initials=initials; \ + __description=description; \ + __count=0; \ + __passed=0; \ + __time=0; \ + __ttime=0; \ + printf("0\t%s\t%d\t%s\tSTART\tpassed\ttotal\ttime\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,__set_id, __description); \ + } while(0) + +// TEST_START +// Begins a test, and starts the clock +// name - brief name for this test +#define TEST_START(name) \ + do { \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + __name=name; \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + } while(0) + +// TEST_PASS +// Indicates the test passed +// test_id - unique test ID number, same format as the set_id number +// This should match the id provided to the matching TEST_FAIL call +#define TEST_PASS(test_id) \ + do { \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + } while(0) + +// __time=clock(); +// __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (__zip) : "memory" ); +// __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (__zip) : "memory" ); + +// TEST_FAIL +// Indicates the test failed +// test_id - unique test ID number, same format as the set_id number +// This should match the id provided to the matching TEST_PASS call +// why - brief description of why it failed +#define TEST_FAIL(test_id,why,error_code) \ + do { \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,error_code); \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + } while(0) + +// TEST_CHECK +// Passes or fails the test after evaluating the "test" argument (just like assert but without terminating the program) +// The clock is immediately stopped so the time required to evaluate "test" will NOT be included in the reported time +// If the test failed, the reason will be printed as FAILED BECAUSE: check (value of "test") failed +// test_id - unique test ID number, same format as the set_id number +// test - expression evaluating to true/false +#define TEST_CHECK(test_id,test,error_code) \ + do { \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + if(test) \ + { \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + } \ + else \ + { \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: check %s failed\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,#test,error_code); \ + } \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + } while(0) + +// TEST_FUNCTION +// Runs a test encapsulated in a function that returns 0 if the test passed and an error number if it failed +// The clock is started on calling the function and stopped as soon as it returns so the branching logic will not be included in the time +// test_id - unique test ID number, same format as the set_id number +// name - brief name for the test +// func - function invocation (should include parenthesis, may have arguments) +// why - brief description to print if the test fails +#define TEST_FUNCTION(test_id,name,func,why) \ + do { \ + TEST_START(name); \ + int result=func; \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ + __time=__ttemp-__time; \ + __ttime+=__time; \ + __count++; \ + if(result==0) \ + { \ + __passed++; \ + printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ + } \ + else \ + { \ + __success=0; \ + printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,result); \ + } \ + __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ + } while(0) + +// TEST_SET_DONE +// Ends a set of tests, prints out the closing banner (OK if all tests pass, PROBLEM if any fail) +// Also prints count of tests passed, tests run and total time +#define TEST_SET_DONE() \ + do { \ + printf("9\t%s\t%d\t%s\t%s\t%d\t%d\t%d\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,(__count==__passed)?"OK":"PROBLEM",__passed,__count,__ttime,__set_id,__description); \ + } while(0) + +// TEST_EXIT +// Call this ONCE at the very end of the test program, it calls "exit" to return +// EXIT_SUCCESS if all tests passed or EXIT_FAILURE if any tests failed. +// This allows the makefile/shell script running the tests to know which ones failed +#define TEST_EXIT() \ + do { \ + if(__success) \ + exit(0); \ + else \ + exit(-1); \ + } while (0) diff --git a/Extras/simdmathlibrary/spu/tests/copysignd2.c b/Extras/simdmathlibrary/spu/tests/copysignd2.c new file mode 100644 index 000000000..0aa8312c3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/copysignd2.c @@ -0,0 +1,74 @@ +/* Test copysignd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" +int main() +{ + TEST_SET_START("20040928185245EJL","EJL", "copysign"); + + double x0m = hide_double(1989.0); + double x0s = hide_double(-319875.0); + double x0c = hide_double(-1989.0); + double x1m = hide_double(9013.0); + double x1s = hide_double(185.0); + double x1c = hide_double(9013.0); + + vec_double2 x0m_v = spu_splats(x0m); + vec_double2 x0s_v = spu_splats(x0s); + vec_double2 x0c_v = spu_splats(x0c); + + vec_double2 x1m_v = spu_splats(x1m); + vec_double2 x1s_v = spu_splats(x1s); + vec_double2 x1c_v = spu_splats(x1c); + + double res; + vec_double2 res_v; + + TEST_START("copysignd2"); + res_v = copysignd2( x0m_v, x0s_v ); + TEST_CHECK("20040928185248EJL", allequal_double2( res_v, x0c_v ), 0); + res_v = copysignd2( x1m_v, x1s_v ); + TEST_CHECK("20040928185251EJL", allequal_double2( res_v, x1c_v ), 0); + + TEST_START("copysign"); + res = copysign( x0m, x0s ); + TEST_CHECK("20040928185253EJL", res == x0c, 0); + res = copysign( x1m, x1s ); + TEST_CHECK("20040928185256EJL", res == x1c, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/copysignf4.c b/Extras/simdmathlibrary/spu/tests/copysignf4.c new file mode 100644 index 000000000..148ecca5b --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/copysignf4.c @@ -0,0 +1,75 @@ +/* Test copysignf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040917114054EJL", "EJL", "copysignf"); + + float x0m = hide_float(1989.0f); + float x0s = hide_float(-319875.0f); + float x0c = hide_float(-1989.0f); + float x1m = hide_float(9013.0f); + float x1s = hide_float(185.0f); + float x1c = hide_float(9013.0f); + + vec_float4 x0m_v = spu_splats(x0m); + vec_float4 x0s_v = spu_splats(x0s); + vec_float4 x0c_v = spu_splats(x0c); + + vec_float4 x1m_v = spu_splats(x1m); + vec_float4 x1s_v = spu_splats(x1s); + vec_float4 x1c_v = spu_splats(x1c); + + float res; + vec_float4 res_v; + + TEST_START("copysignf4"); + res_v = copysignf4( x0m_v, x0s_v ); + TEST_CHECK("20040917114058EJL", allequal_float4( res_v, x0c_v ), 0); + res_v = copysignf4( x1m_v, x1s_v ); + TEST_CHECK("20040917114100EJL", allequal_float4( res_v, x1c_v ), 0); + + TEST_START("copysignf"); + res = copysignf( x0m, x0s ); + TEST_CHECK("20040917114102EJL", res == x0c, 0); + res = copysignf( x1m, x1s ); + TEST_CHECK("20040917114104EJL", res == x1c, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/divd2.c b/Extras/simdmathlibrary/spu/tests/divd2.c new file mode 100644 index 000000000..9e796c94b --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/divd2.c @@ -0,0 +1,153 @@ +/* Test divd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040927182952EJL","EJL", "divd2"); + + unsigned long long i3n = 0x747f7fefa0c3274aull; + unsigned long long i3d = 0x7606a4533cf5605eull; + unsigned long long i3r = 0x3e66426af0ec01b0ull; + unsigned long long i4n = 0x4c042c295376566eull; + unsigned long long i4d = 0x39b3720562510408ull; + unsigned long long i4r = 0x52409928d3244077ull; + unsigned long long i5n = 0x6911a64538a389aeull; + unsigned long long i5d = 0x1ac4d062d451c99dull; + unsigned long long i5r = 0x7ff0000000000000ull; + unsigned long long i6n = 0x5b112f9d39e7de27ull; + unsigned long long i6d = 0x5659f8dbe4993d7cull; + unsigned long long i6r = 0x44a52cb9b9d2b2cdull; + unsigned long long i7n = 0x7410065c772e25daull; + unsigned long long i7d = 0x6a576b936e5f1034ull; + unsigned long long i7r = 0x49a5e53936c1b556ull; + unsigned long long i8n = 0x3605d9b2916be0f5ull; + unsigned long long i8d = 0x61f25e39867b0a9eull; + unsigned long long i8r = 0x1403088aa08482f2ull; + + double x0n = hide_double(-1.0/0.0); // -Inf/ Inf == NaN + double x0d = hide_double(1.0/0.0); + + double x1n = hide_double(0.0); // 0 / 0 == NaN + double x1d = hide_double(-0.0); + + double x2n = hide_double(0.0/0.0); // NaN / 2 == NaN + double x2d = hide_double(2.0); + + double x3n = hide_double(make_double(i3n)); + double x3d = hide_double(make_double(i3d)); + double x3r = hide_double(make_double(i3r)); + + double x4n = hide_double(make_double(i4n)); + double x4d = hide_double(make_double(i4d)); + double x4r = hide_double(make_double(i4r)); + + double x5n = hide_double(make_double(i5n)); + double x5d = hide_double(make_double(i5d)); + double x5r = hide_double(make_double(i5r)); + + double x6n = hide_double(make_double(i6n)); + double x6d = hide_double(make_double(i6d)); + double x6r = hide_double(make_double(i6r)); + + double x7n = hide_double(make_double(i7n)); + double x7d = hide_double(make_double(i7d)); + double x7r = hide_double(make_double(i7r)); + + double x8n = hide_double(make_double(i8n)); + double x8d = hide_double(make_double(i8d)); + double x8r = hide_double(make_double(i8r)); + + vec_double2 x0n_v = spu_splats(x0n); + vec_double2 x0d_v = spu_splats(x0d); + + vec_double2 x1n_v = spu_splats(x1n); + vec_double2 x1d_v = spu_splats(x1d); + + vec_double2 x2n_v = spu_splats(x2n); + vec_double2 x2d_v = spu_splats(x2d); + + vec_double2 x3n_v = spu_splats(x3n); + vec_double2 x3d_v = spu_splats(x3d); + vec_double2 x3r_v = spu_splats(x3r); + + vec_double2 x4n_v = spu_splats(x4n); + vec_double2 x4d_v = spu_splats(x4d); + vec_double2 x4r_v = spu_splats(x4r); + + vec_double2 x5n_v = spu_splats(x5n); + vec_double2 x5d_v = spu_splats(x5d); + vec_double2 x5r_v = spu_splats(x5r); + + vec_double2 x6n_v = spu_splats(x6n); + vec_double2 x6d_v = spu_splats(x6d); + vec_double2 x6r_v = spu_splats(x6r); + + vec_double2 x7n_v = spu_splats(x7n); + vec_double2 x7d_v = spu_splats(x7d); + vec_double2 x7r_v = spu_splats(x7r); + + vec_double2 x8n_v = spu_splats(x8n); + vec_double2 x8d_v = spu_splats(x8d); + vec_double2 x8r_v = spu_splats(x8r); + + vec_double2 res_v; + + TEST_START("divd2"); + res_v = divd2(x0n_v, x0d_v); + TEST_CHECK("20040927183001EJL", allnan_double2( res_v ), 0); + res_v = divd2(x1n_v, x1d_v); + TEST_CHECK("20040927183003EJL", allnan_double2( res_v ), 0); + res_v = divd2(x2n_v, x2d_v); + TEST_CHECK("20040927183006EJL", allnan_double2( res_v ), 0); + res_v = divd2(x3n_v, x3d_v); + TEST_CHECK("20040927183008EJL", allequal_ulps_double2( res_v, x3r_v, 1 ), 0 ); + res_v = divd2(x4n_v, x4d_v); + TEST_CHECK("20040927183010EJL", allequal_ulps_double2( res_v, x4r_v, 1 ), 0 ); + res_v = divd2(x5n_v, x5d_v); + TEST_CHECK("20040927183012EJL", allequal_ulps_double2( res_v, x5r_v, 1 ), 0 ); + res_v = divd2(x6n_v, x6d_v); + TEST_CHECK("20040927183014EJL", allequal_ulps_double2( res_v, x6r_v, 1 ), 0 ); + res_v = divd2(x7n_v, x7d_v); + TEST_CHECK("20040927183016EJL", allequal_ulps_double2( res_v, x7r_v, 1 ), 0 ); + res_v = divd2(x8n_v, x8d_v); + TEST_CHECK("20040927183018EJL", allequal_ulps_double2( res_v, x8r_v, 1 ), 0 ); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/divf4.c b/Extras/simdmathlibrary/spu/tests/divf4.c new file mode 100644 index 000000000..e60b668db --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/divf4.c @@ -0,0 +1,128 @@ +/* Test divf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040928105926EJL","EJL", "divf4"); + + unsigned int i0n = 0x75013340; + unsigned int i0d = 0x75e7753f; + unsigned int i0r = 0x3e8ee64b; + unsigned int i1n = 0x4c7fed5a; + unsigned int i1d = 0x3a0731f0; + unsigned int i1r = 0x51f24e86; + unsigned int i2n = 0x5b08b303; + unsigned int i2d = 0x562f5046; + unsigned int i2r = 0x44479d24; + unsigned int i3n = 0x748a9b87; + unsigned int i3d = 0x6b014b46; + unsigned int i3r = 0x49093864; + unsigned int i4n = 0x35dcf9d8; + unsigned int i4d = 0x6278d6e0; + unsigned int i4r = 0x12e355b5; + unsigned int i5n = 0x74d505fd; + unsigned int i5d = 0x61ef565e; + unsigned int i5r = 0x5263daa3; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0d_v = spu_splats(x0d); + vec_float4 x0r_v = spu_splats(x0r); + + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1d_v = spu_splats(x1d); + vec_float4 x1r_v = spu_splats(x1r); + + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2d_v = spu_splats(x2d); + vec_float4 x2r_v = spu_splats(x2r); + + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3d_v = spu_splats(x3d); + vec_float4 x3r_v = spu_splats(x3r); + + vec_float4 x4n_v = spu_splats(x4n); + vec_float4 x4d_v = spu_splats(x4d); + vec_float4 x4r_v = spu_splats(x4r); + + vec_float4 x5n_v = spu_splats(x5n); + vec_float4 x5d_v = spu_splats(x5d); + vec_float4 x5r_v = spu_splats(x5r); + + vec_float4 res_v; + + TEST_START("divf4"); + res_v = divf4(x0n_v, x0d_v); + TEST_CHECK("20040928105932EJL", allequal_ulps_float4( res_v, x0r_v, 2 ), 0); + res_v = divf4(x1n_v, x1d_v); + TEST_CHECK("20040928105934EJL", allequal_ulps_float4( res_v, x1r_v, 2 ), 0); + res_v = divf4(x2n_v, x2d_v); + TEST_CHECK("20040928105936EJL", allequal_ulps_float4( res_v, x2r_v, 2 ), 0); + res_v = divf4(x3n_v, x3d_v); + TEST_CHECK("20040928105938EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = divf4(x4n_v, x4d_v); + TEST_CHECK("20040928105940EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = divf4(x5n_v, x5d_v); + TEST_CHECK("20040928105943EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/divi4.c b/Extras/simdmathlibrary/spu/tests/divi4.c new file mode 100644 index 000000000..a56239b23 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/divi4.c @@ -0,0 +1,123 @@ +/* Test divi4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040928161739EJL","EJL", "divi4"); + + int x0n = 0xffccb78d; + int x0d = 0x0 ; + int x0q = 0x0 ; + int x0r = 0xffccb78d; + int x1n = 0xff978333; + int x1d = 0xff976bb6; + int x1q = 0x0 ; + int x1r = 0xff978333; + int x2n = 0x5e146 ; + int x2d = 0xd14ebe0e; + int x2q = 0x0 ; + int x2r = 0x5e146 ; + int x3n = 0xf0e91618; + int x3d = 0xfddff7ac; + int x3q = 0x7 ; + int x3r = 0xffc95064; + + int x4n = 0xf2128d9d; + int x4d = 0xe0f76 ; + int x4q = 0xffffff03; + int x4r = 0xfff7d53b; + int x5n = 0xda1ba2ce; + int x5d = 0x4c9 ; + int x5q = 0xfff814d3; + int x5r = 0xfffffd23; + int x6n = 0xdd4426a6; + int x6d = 0xf8d245cf; + int x6q = 0x4 ; + int x6r = 0xf9fb0f6a; + int x7n = 0xd1d5ae9 ; + int x7d = 0x333ab105; + int x7q = 0x0 ; + int x7r = 0xd1d5ae9 ; + + int x8n = 0x3e0c6 ; + int x8d = 0xfff24255; + int x8q = 0x0 ; + int x8r = 0x3e0c6 ; + int x9n = 0xfd6fe27e; + int x9d = 0xf32454 ; + int x9q = 0xfffffffe; + int x9r = 0xff562b26; + int x10n =0xfb150f79; + int x10d =0xf521 ; + int x10q =0xfffffade; + int x10r =0xffff42db; + int x11n =0xfe88071f; + int x11d =0xfff937c2; + int x11q =0x37 ; + int x11r =0xfffd0c71; + + + vec_int4 x0n_v = (vec_int4){ x0n, x1n, x2n, x3n }; + vec_int4 x1n_v = (vec_int4){ x4n, x5n, x6n, x7n }; + vec_int4 x2n_v = (vec_int4){ x8n, x9n, x10n, x11n }; + + vec_int4 x0d_v = (vec_int4){ x0d, x1d, x2d, x3d }; + vec_int4 x1d_v = (vec_int4){ x4d, x5d, x6d, x7d }; + vec_int4 x2d_v = (vec_int4){ x8d, x9d, x10d, x11d }; + + vec_int4 x0q_v = (vec_int4){ x0q, x1q, x2q, x3q }; + vec_int4 x1q_v = (vec_int4){ x4q, x5q, x6q, x7q }; + vec_int4 x2q_v = (vec_int4){ x8q, x9q, x10q, x11q }; + + vec_int4 x0r_v = (vec_int4){ x0r, x1r, x2r, x3r }; + vec_int4 x1r_v = (vec_int4){ x4r, x5r, x6r, x7r }; + vec_int4 x2r_v = (vec_int4){ x8r, x9r, x10r, x11r }; + + divi4_t res; + + TEST_START("divi4"); + res = divi4(x0n_v, x0d_v); + TEST_CHECK("20040928161846EJL", allequal_int4( res.quot, x0q_v ) && allequal_int4( res.rem, x0r_v ), 0); + res = divi4(x1n_v, x1d_v); + TEST_CHECK("20040928161851EJL", allequal_int4( res.quot, x1q_v ) && allequal_int4( res.rem, x1r_v ), 0); + res = divi4(x2n_v, x2d_v); + TEST_CHECK("20040928161855EJL", allequal_int4( res.quot, x2q_v ) && allequal_int4( res.rem, x2r_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/divu4.c b/Extras/simdmathlibrary/spu/tests/divu4.c new file mode 100644 index 000000000..a5fff1d42 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/divu4.c @@ -0,0 +1,146 @@ +/* Test divu4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060901150000MH","MH", "divu4"); + + unsigned int x0n = 0xffccb78d; + unsigned int x0d = 0x0 ; + unsigned int x0q = 0x0 ; + unsigned int x0r = 0xffccb78d; + unsigned int x1n = 0xff978333; + unsigned int x1d = 0xff976bb6; + unsigned int x1q = 0x1 ; + unsigned int x1r = 0x177d ; + unsigned int x2n = 0x5e146 ; + unsigned int x2d = 0xd14ebe0e; + unsigned int x2q = 0x0 ; + unsigned int x2r = 0x5e146 ; + unsigned int x3n = 0xf0e91618; + unsigned int x3d = 0xfddff7ac; + unsigned int x3q = 0x0 ; + unsigned int x3r = 0xf0e91618; + + unsigned int x4n = 0xf2128d9d; + unsigned int x4d = 0xe0f76 ; + unsigned int x4q = 0x1137 ; + unsigned int x4r = 0x66543 ; + unsigned int x5n = 0xda1ba2ce; + unsigned int x5d = 0x4c9 ; + unsigned int x5q = 0x2d9482 ; + unsigned int x5r = 0xbc ; + unsigned int x6n = 0xdd4426a6; + unsigned int x6d = 0xf8d245cf; + unsigned int x6q = 0x0 ; + unsigned int x6r = 0xdd4426a6; + unsigned int x7n = 0xd1d5ae9 ; + unsigned int x7d = 0x333ab105; + unsigned int x7q = 0x0 ; + unsigned int x7r = 0xd1d5ae9 ; + + unsigned int x8n = 0x3e0c6 ; + unsigned int x8d = 0xfff24255; + unsigned int x8q = 0x0 ; + unsigned int x8r = 0x3e0c6 ; + unsigned int x9n = 0xfd6fe27e; + unsigned int x9d = 0xf32454 ; + unsigned int x9q = 0x10a ; + unsigned int x9r = 0xcc2336 ; + unsigned int x10n =0xfb150f79; + unsigned int x10d =0xf521 ; + unsigned int x10q =0x10637 ; + unsigned int x10r =0x9f62 ; + unsigned int x11n =0xfe88071f; + unsigned int x11d =0xfff937c2; + unsigned int x11q =0x0 ; + unsigned int x11r =0xfe88071f; + + unsigned int x12n =0xc374fa4 ; + unsigned int x12d =0x1234 ; + unsigned int x12q =0xabcd ; + unsigned int x12r =0x0 ; + unsigned int x13n =0xffffffff; + unsigned int x13d =0x2 ; + unsigned int x13q =0x7fffffff; + unsigned int x13r =0x1 ; + unsigned int x14n =0x0 ; + unsigned int x14d =0x12345678; + unsigned int x14q =0x0 ; + unsigned int x14r =0x0 ; + unsigned int x15n =0xffffffff; + unsigned int x15d =0x1 ; + unsigned int x15q =0xffffffff; + unsigned int x15r =0x0 ; + + vec_uint4 x0n_v = (vec_uint4){ x0n, x1n, x2n, x3n }; + vec_uint4 x1n_v = (vec_uint4){ x4n, x5n, x6n, x7n }; + vec_uint4 x2n_v = (vec_uint4){ x8n, x9n, x10n, x11n }; + vec_uint4 x3n_v = (vec_uint4){ x12n, x13n, x14n, x15n }; + + vec_uint4 x0d_v = (vec_uint4){ x0d, x1d, x2d, x3d }; + vec_uint4 x1d_v = (vec_uint4){ x4d, x5d, x6d, x7d }; + vec_uint4 x2d_v = (vec_uint4){ x8d, x9d, x10d, x11d }; + vec_uint4 x3d_v = (vec_uint4){ x12d, x13d, x14d, x15d }; + + vec_uint4 x0q_v = (vec_uint4){ x0q, x1q, x2q, x3q }; + vec_uint4 x1q_v = (vec_uint4){ x4q, x5q, x6q, x7q }; + vec_uint4 x2q_v = (vec_uint4){ x8q, x9q, x10q, x11q }; + vec_uint4 x3q_v = (vec_uint4){ x12q, x13q, x14q, x15q }; + + vec_uint4 x0r_v = (vec_uint4){ x0r, x1r, x2r, x3r }; + vec_uint4 x1r_v = (vec_uint4){ x4r, x5r, x6r, x7r }; + vec_uint4 x2r_v = (vec_uint4){ x8r, x9r, x10r, x11r }; + vec_uint4 x3r_v = (vec_uint4){ x12r, x13r, x14r, x15r }; + + divu4_t res; + + TEST_START("divu4"); + res = divu4(x0n_v, x0d_v); + TEST_CHECK("20060901150001MH", allequal_uint4( res.quot, x0q_v ) && allequal_uint4( res.rem, x0r_v ), 0); + res = divu4(x1n_v, x1d_v); + TEST_CHECK("20060901150002MH", allequal_uint4( res.quot, x1q_v ) && allequal_uint4( res.rem, x1r_v ), 0); + res = divu4(x2n_v, x2d_v); + TEST_CHECK("20060901150003MH", allequal_uint4( res.quot, x2q_v ) && allequal_uint4( res.rem, x2r_v ), 0); + res = divu4(x3n_v, x3d_v); + TEST_CHECK("20060901150004MH", allequal_uint4( res.quot, x3q_v ) && allequal_uint4( res.rem, x3r_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fabsd2.c b/Extras/simdmathlibrary/spu/tests/fabsd2.c new file mode 100644 index 000000000..ac74c63e7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fabsd2.c @@ -0,0 +1,103 @@ +/* Test fabsd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040908022501EJL","EJL", "fabs"); + + double x0n = hide_double(-0.0); + double x0p = hide_double(0.0); + double x1n = hide_double(-83532.96153153); + double x1p = hide_double(83532.96153153); + double x2n = hide_double(-0.0000000013152); + double x2p = hide_double(0.0000000013152); + double x3n = hide_double(-1.0/0.0); + double x3p = hide_double(1.0/0.0); + + vec_double2 x0n_v = spu_splats(x0n); + vec_double2 x0p_v = spu_splats(x0p); + vec_double2 x1n_v = spu_splats(x1n); + vec_double2 x1p_v = spu_splats(x1p); + vec_double2 x2n_v = spu_splats(x2n); + vec_double2 x2p_v = spu_splats(x2p); + vec_double2 x3n_v = spu_splats(x3n); + vec_double2 x3p_v = spu_splats(x3p); + + double res; + vec_double2 res_v; + + TEST_START("fabsd2"); + res_v = fabsd2(x0n_v); + TEST_CHECK("20040908022502EJL", allequal_double2( res_v, x0p_v ), 0); + res_v = fabsd2(x0p_v); + TEST_CHECK("20040908022503EJL", allequal_double2( res_v, x0p_v ), 0); + res_v = fabsd2(x1n_v); + TEST_CHECK("20040908022504EJL", allequal_double2( res_v, x1p_v ), 0); + res_v = fabsd2(x1p_v); + TEST_CHECK("20040908022505EJL", allequal_double2( res_v, x1p_v ), 0); + res_v = fabsd2(x2n_v); + TEST_CHECK("20040908022506EJL", allequal_double2( res_v, x2p_v ), 0); + res_v = fabsd2(x2p_v); + TEST_CHECK("20040908022507EJL", allequal_double2( res_v, x2p_v ), 0); + res_v = fabsd2(x3n_v); + TEST_CHECK("20040908022508EJL", allposinf_double2( res_v ), 0); + res_v = fabsd2(x3p_v); + TEST_CHECK("20040908022509EJL", allposinf_double2( res_v ), 0); + + TEST_START("fabs"); + res = fabs( x0n ); + TEST_CHECK("20040908022510EJL", res == x0p, 0); + res = fabs( x0p ); + TEST_CHECK("20040908022511EJL", res == x0p, 0); + res = fabs( x1n ); + TEST_CHECK("20040908022512EJL", res == x1p, 0); + res = fabs( x1p ); + TEST_CHECK("20040908022513EJL", res == x1p, 0); + res = fabs( x2n ); + TEST_CHECK("20040908022514EJL", res == x2p, 0); + res = fabs( x2p ); + TEST_CHECK("20040908022515EJL", res == x2p, 0); + res = fabs( x3n ); + TEST_CHECK("20040908022516EJL", isinf(res) == 1, 0); + res = fabs( x3p ); + TEST_CHECK("20040908022517EJL", isinf(res) == 1, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fabsf4.c b/Extras/simdmathlibrary/spu/tests/fabsf4.c new file mode 100644 index 000000000..4d62074e2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fabsf4.c @@ -0,0 +1,106 @@ +/* Test fabsf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040915032605EJL","EJL", "fabsf"); + + unsigned int i3n = 0xffffffff; + unsigned int i3p = 0x7fffffff; + + float x0n = hide_float(-0.0f); + float x0p = hide_float(0.0f); + float x1n = hide_float(-83532.96153153f); + float x1p = hide_float(83532.96153153f); + float x2n = hide_float(-0.0000000013152f); + float x2p = hide_float(0.0000000013152f); + float x3n = hide_float(make_float(i3n)); + float x3p = hide_float(make_float(i3p)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0p_v = spu_splats(x0p); + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1p_v = spu_splats(x1p); + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2p_v = spu_splats(x2p); + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3p_v = spu_splats(x3p); + + float res; + vec_float4 res_v; + + TEST_START("fabsf4"); + res_v = fabsf4(x0n_v); + TEST_CHECK("20040915032618EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = fabsf4(x0p_v); + TEST_CHECK("20040915032632EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = fabsf4(x1n_v); + TEST_CHECK("20040915032643EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = fabsf4(x1p_v); + TEST_CHECK("20040915032654EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = fabsf4(x2n_v); + TEST_CHECK("20040915032704EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = fabsf4(x2p_v); + TEST_CHECK("20040915032712EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = fabsf4(x3n_v); + TEST_CHECK("20040915032719EJL", allequal_float4( res_v, x3p_v ), 0); + res_v = fabsf4(x3p_v); + TEST_CHECK("20040915032729EJL", allequal_float4( res_v, x3p_v ), 0); + + TEST_START("fabsf"); + res = fabsf( x0n ); + TEST_CHECK("20040915032739EJL", res == x0p, 0); + res = fabsf( x0p ); + TEST_CHECK("20040915032747EJL", res == x0p, 0); + res = fabsf( x1n ); + TEST_CHECK("20040915032755EJL", res == x1p, 0); + res = fabsf( x1p ); + TEST_CHECK("20040915032806EJL", res == x1p, 0); + res = fabsf( x2n ); + TEST_CHECK("20040915032814EJL", res == x2p, 0); + res = fabsf( x2p ); + TEST_CHECK("20040915032826EJL", res == x2p, 0); + res = fabsf( x3n ); + TEST_CHECK("20040915032834EJL", res == x3p, 0); + res = fabsf( x3p ); + TEST_CHECK("20040915032841EJL", res == x3p, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fdimd2.c b/Extras/simdmathlibrary/spu/tests/fdimd2.c new file mode 100644 index 000000000..d1b7e6e7f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fdimd2.c @@ -0,0 +1,173 @@ +/* Test fdimd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ fdimd2 - compute positive difference. + * + *@brief + * boundary test for fdimd2. + * + *@pre + * + *@criteria + * when both of two values are denormalized, it may not work correctly. + * + *@note + * source of fdimd2.c was modified from IBM SDK1.1 math library + * + **/ + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20060824151500MH","MH", "fdimd2"); + + double x0min = hide_double(1760.135); + double x0max = hide_double(19355.03); + double x0dim = hide_double(19355.03 - 1760.135); + + double x1min = hide_double(-12351.9); + double x1max = hide_double(-139.035); + double x1dim = hide_double((-139.035) - (-12351.9)); + + double x2min = hide_double(-1.0); + double x2max = hide_double(0.0); + double x2dim = hide_double(1.0); + + double x3min = hide_double(nan("")); + double x3max = hide_double(-1.0); + + double x4min = hide_double(-0.0); + double x4max = hide_double(0.0); + + double x5min = hide_double(5.0e-324); + double x5max = hide_double(1.0e-323); + double x5dim = hide_double(1.0e-323 - 5.0e-324); + + double x6min = hide_double(DBL_MAX); + double x6max = hide_double(1.0/0.0); + + double x7min = hide_double(-1.0/0.0); + double x7max = hide_double(19355.03); + + double x8min = hide_double(-1.0/0.0); + double x8max = hide_double(1.0/0.0); + + vec_double2 x0min_v = spu_splats(x0min); + vec_double2 x0max_v = spu_splats(x0max); + vec_double2 x0dim_v = spu_splats(x0dim); + + vec_double2 x1min_v = spu_splats(x1min); + vec_double2 x1max_v = spu_splats(x1max); + vec_double2 x1dim_v = spu_splats(x1dim); + + vec_double2 x2min_v = spu_splats(x2min); + vec_double2 x2max_v = spu_splats(x2max); + vec_double2 x2dim_v = spu_splats(x2dim); + + vec_double2 x3min_v = spu_splats(x3min); + vec_double2 x3max_v = spu_splats(x3max); + + vec_double2 x4min_v = spu_splats(x4min); + vec_double2 x4max_v = spu_splats(x4max); + + vec_double2 x5min_v = spu_splats(x5min); + vec_double2 x5max_v = spu_splats(x5max); + vec_double2 x5dim_v = spu_splats(x5dim); + + vec_double2 x6min_v = spu_splats(x6min); + vec_double2 x6max_v = spu_splats(x6max); + + vec_double2 x7min_v = spu_splats(x7min); + vec_double2 x7max_v = spu_splats(x7max); + + vec_double2 x8min_v = spu_splats(x8min); + vec_double2 x8max_v = spu_splats(x8max); + + vec_double2 x9minmax_v = (vec_double2){x0min, x1max}; + vec_double2 x9maxmin_v = (vec_double2){x0max, x1min}; + vec_double2 x9dim1_v = (vec_double2){x0dim, 0.0}; + vec_double2 x9dim2_v = (vec_double2){0.0, x1dim}; + + vec_double2 res_v; + + TEST_START("fdimd2"); + res_v = fdimd2(x0min_v, x0max_v); + TEST_CHECK("20060824151501MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x0max_v, x0min_v); + TEST_CHECK("20060824151502MH", allequal_double2( res_v, x0dim_v ), 0); + res_v = fdimd2(x1min_v, x1max_v); + TEST_CHECK("20060824151503MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x1max_v, x1min_v); + TEST_CHECK("20060824151504MH", allequal_double2( res_v, x1dim_v ), 0); + res_v = fdimd2(x2min_v, x2max_v); + TEST_CHECK("20060824151505MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x2max_v, x2min_v); + TEST_CHECK("20060824151506MH", allequal_double2( res_v, x2dim_v ), 0); + res_v = fdimd2(x3min_v, x3max_v); + TEST_CHECK("20060824151507MH", allnan_double2( res_v ), 0); + res_v = fdimd2(x3max_v, x3min_v); + TEST_CHECK("20060824151508MH", allnan_double2( res_v ), 0); + res_v = fdimd2(x4min_v, x4max_v); + TEST_CHECK("20060824151509MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x4max_v, x4min_v); + TEST_CHECK("20060824151510MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x5min_v, x5max_v); + TEST_CHECK("20060824151511MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x5max_v, x5min_v); + TEST_CHECK("20060824151512MH", allequal_double2( res_v, x5dim_v ), 0); + res_v = fdimd2(x6min_v, x6max_v); + TEST_CHECK("20060824151513MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x6max_v, x6min_v); + TEST_CHECK("20060824151514MH", allposinf_double2( res_v ), 0); + res_v = fdimd2(x7min_v, x7max_v); + TEST_CHECK("20060824151515MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x7max_v, x7min_v); + TEST_CHECK("20060824151516MH", allposinf_double2( res_v ), 0); + res_v = fdimd2(x8min_v, x8max_v); + TEST_CHECK("20060824151517MH", allposzero_double2( res_v ), 0); + res_v = fdimd2(x8max_v, x8min_v); + TEST_CHECK("20060824151518MH", allposinf_double2( res_v ), 0); + res_v = fdimd2(x9minmax_v, x9maxmin_v); + TEST_CHECK("20060824151519MH", allequal_double2( res_v, x9dim2_v ), 0); + res_v = fdimd2(x9maxmin_v, x9minmax_v); + TEST_CHECK("20060824151520MH", allequal_double2( res_v, x9dim1_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h b/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h new file mode 100644 index 000000000..3ed3a3491 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h @@ -0,0 +1,173 @@ +/* Common part of testsuite for SPU SIMD Math library + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + + +#ifndef _FLOATINGPOINT_TESTS_H_ +#define _FLOATINGPOINT_TESTS_H_ + +#if __PPC__ + #include + #define vec_uchar16 vector unsigned char + #define vec_char16 vector signed char + #define vec_ushort8 vector unsigned short + #define vec_short8 vector signed short + #define vec_uint4 vector unsigned int + #define vec_int4 vector signed int + #define vec_ullong2 vector unsigned long long + #define vec_llong2 vector signed long long + #define vec_float4 vector float + #define vec_double2 vector double +#else + #if __SPU__ + #include + #endif +#endif + +// To avoid type punning warnings (for printing in hex notation, doing bit-diff etc) +typedef union { + double d; + unsigned char uc[8]; + unsigned int ui[2]; + unsigned long long int ull; +} sce_math_alt_double; + +typedef union { + float f; + unsigned char uc[4]; + unsigned int ui; +} sce_math_alt_float; + +#if (__PPC__ || __SPU__) +typedef union { + vec_int4 vsi; + int si[4]; +} sce_math_alt_vec_int4; + +typedef union { + vec_uint4 vui; + int ui[4]; +} sce_math_alt_vec_uint4; + +typedef union { + vec_float4 vf; + float sf[4]; + unsigned int ui[4]; +} sce_math_alt_vec_float4; +#endif +#if __SPU__ + typedef union { + double sd[2]; + vec_double2 vd; + unsigned long long int ui[2]; + } sce_math_alt_vec_double2; +#endif + +#if __PPC__ + static inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub(refi, valsi); + vec_int4 negdiff = vec_sub(((vec_int4)0), diff); + + return vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4)0) )); + } + static inline int bitdiff(float ref, float val) { + sce_math_alt_float aref, aval; + aref.f = ref; + aval.f = val; + int diff = aref.ui - aval.ui; + return (diff>0)?diff:-diff; + } + static inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = vec_sub(refi, valsi); + vec_int4 negdiff = vec_sub(((vec_int4)0), diff); + + diff = vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4)0) )); + vec_float4 logdiff = vec_loge(vec_ctf(diff,0)); + return vec_sub(((vec_int4)32), vec_cts(vec_ceil(logdiff),0)); + } + static inline int bitmatch(float ref, float val) { + sce_math_alt_vec_float4 aref, aval; + sce_math_alt_vec_int4 adiff; + aref.sf[0] = ref; + aval.sf[0] = val; + adiff.vsi = bitmatch4(aref.vf, aval.vf); + return adiff.si[0]; + } +#else + #if __SPU__ + static inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)); + } + static inline int bitdiff(float ref, float val) { + return spu_extract(bitdiff4(spu_promote(ref,0), spu_promote(val,0)), 0); + } + static inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return (vec_int4)spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); + } + static inline int bitmatch(float ref, float val) { + return spu_extract(bitmatch4(spu_promote(ref,0), spu_promote(val,0)), 0); + } + + #else + inline int bitdiff(sce_math_alt_float ref, sce_math_alt_float val) { + int diff = ref.ui - val.ui; + return((diff>0)?diff:-diff); + } + inline int bitmatch(sce_math_alt_float ref, sce_math_alt_float val) { + int diff, i; + unsigned int udiff; + diff = ref.ui - val.ui; + udiff = (diff>0) ? diff : -diff; + i = 32; + while(udiff != 0) { + i = i-1; + udiff = udiff >> 1; + } + return udiff; + } + #endif // __SPU__ +#endif // __PPC__ + + +#endif // _FLOATINGPOINT_TESTS_H_ diff --git a/Extras/simdmathlibrary/spu/tests/floord2.c b/Extras/simdmathlibrary/spu/tests/floord2.c new file mode 100644 index 000000000..01ad0e1a7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/floord2.c @@ -0,0 +1,157 @@ +/* Test roundd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ floord2 - for each of two doule slots, + * round up to smallest integer not more than the value. + * + *@brief + * boundary test for floord2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +typedef union { + struct { + double xxx[2]; + double ans[2]; + } dbl; + struct { + unsigned long long xxx[2]; + unsigned long long ans[2]; + } ull; +} TestVec_Roundd2; + +int main() +{ + TestVec_Roundd2 test_a[] = { + { + ull:{ + // 0 -> 0 , -0 -> -0 + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // -Inf -> -Inf , Inf -> Inf + {0xFFF0000000000000ULL,0x7FF0000000000000ULL}, + {0xFFF0000000000000ULL,0x7FF0000000000000ULL} + } + },{ + ull:{ + // MAX -> MAX , MIN -> MIN + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + } + },{ + ull:{ + // +Denormalize -> 0, -Denormalize -> -1.0 + {0x0000000000000001ULL,0x8000000000000010ULL}, + {0x0000000000000000ULL,0xbff0000000000000ULL} + } + },{ + ull:{ + // +Denormalize -> 0, -Denormalize -> -1.0 + {0x000FFFFFFFFFFFFFULL,0x800FFFFFFFFFFFFFULL}, + {0x0000000000000000ULL,0xbff0000000000000ULL} + } + },{ + ull:{ + // border + {0x4320000000000001ULL,0xC320000000000001ULL}, + {0x4320000000000000ULL,0xC320000000000002ULL} + } + },{ + dbl:{ + {1.0, -1.0}, + {1.0, -1.0} + } + },{ + dbl:{ + {0.5, -0.5}, + {0.0, -1.0} + } + },{ + dbl:{ + {-2.75, 3.25}, + {-3.0, 3.0} + } + },{ + ull:{ + // Nan + {0xFFF0000000000001ULL,0x7FF0000000000001ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL} + } + },{ + ull:{ + {0ULL,0ULL}, + {0ULL,0ULL} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + //vec_double2 input; + + TEST_SET_START("96743652190000NM","FLR", "floord2"); + + TEST_START("floord2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ull.xxx[0] == 0) && (test_a[ii].ull.xxx[1] == 0) ) break; + //input = *((vec_double2 *)&test_a[ii].dbl.xxx[0]); + res_v = floord2 (*((vec_double2 *)&test_a[ii].dbl.xxx[0]) ); + sprintf(msg,"9674365219%04dFLR", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, (vec_llong2)*((vec_double2 *)&test_a[ii].dbl.ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/floorf4.c b/Extras/simdmathlibrary/spu/tests/floorf4.c new file mode 100644 index 000000000..fa514ed8b --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/floorf4.c @@ -0,0 +1,109 @@ +/* Test floorf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040916145017EJL","EJL", "floorf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(-1.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79616.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 x0i_v = spu_splats(x0i); + vec_float4 x1_v = spu_splats(x1); + vec_float4 x1i_v = spu_splats(x1i); + vec_float4 x2_v = spu_splats(x2); + vec_float4 x2i_v = spu_splats(x2i); + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3i_v = spu_splats(x3i); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4i_v = spu_splats(x4i); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5i_v = spu_splats(x5i); + + float res; + vec_float4 res_v; + + TEST_START("floorf4"); + res_v = floorf4(x0_v); + TEST_CHECK("20040916145022EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = floorf4(x1_v); + TEST_CHECK("20040916145024EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = floorf4(x2_v); + TEST_CHECK("20040916145027EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = floorf4(x3_v); + TEST_CHECK("20040916145029EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = floorf4(x4_v); + TEST_CHECK("20040916145032EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = floorf4(x5_v); + TEST_CHECK("20040916145034EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_START("floorf"); + res = floorf(x0); + TEST_CHECK("20040916155814EJL", res == x0i, 0); + res = floorf(x1); + TEST_CHECK("20040916155818EJL", res == x1i, 0); + res = floorf(x2); + TEST_CHECK("20040916155822EJL", res == x2i, 0); + res = floorf(x3); + TEST_CHECK("20040916155825EJL", res == x3i, 0); + res = floorf(x4); + TEST_CHECK("20040916155827EJL", res == x4i, 0); + res = floorf(x5); + TEST_CHECK("20040916155830EJL", res == x5i, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fmad2.c b/Extras/simdmathlibrary/spu/tests/fmad2.c new file mode 100644 index 000000000..49a1ad3c7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fmad2.c @@ -0,0 +1,146 @@ +/* Test fmad2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * + *@@ fmad2 - multiply and add (double). + * + *@brief + * boundary test for fmad2. + * + *@pre + * + *@criteria + * if input parameters are denorm, it may not work correctly. + * + *@note + * + **/ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060828114000MH","MH", "fmad2"); + +// double denorm_min = hide_double(make_double(0x0000000000000001ull)); + double denorm_max = hide_double(make_double(0x000fffffffffffffull)); +// double norm_min = hide_double(make_double(0x0010000000000000ull)); + double norm_max = hide_double(make_double(0x7fefffffffffffffull)); + + double x0 = hide_double(1760.135); + double y0 = hide_double(19355.03); + double z0 = hide_double(-12351.9); + double a0 = hide_double(34055113.82905); + + double x1 = hide_double(-139.035); + double y1 = hide_double(0.0); + double z1 = hide_double(-1.0); + + double x2 = hide_double(nan("")); + double y2 = hide_double(-1.0); + double z2 = hide_double(-0.0); + + double x3 = hide_double(1.0); + double y3 = hide_double(1.0/0.0); + double z3 = hide_double(-1.0); + + double x4 = norm_max; + double y4 = norm_max; + double z4 = hide_double(0.0); + + double x5 = hide_double(100.0); + double y5 = denorm_max; + double z5 = hide_double(0.0); + double a5 = hide_double(make_double(0x0078fffffffffffeull)); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_double2 z0_v = spu_splats(z0); + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_double2 z1_v = spu_splats(z1); + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_double2 z2_v = spu_splats(z2); + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_double2 z3_v = spu_splats(z3); + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_double2 z4_v = spu_splats(z4); + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_double2 z5_v = spu_splats(z5); + + vec_double2 a0_v = spu_splats(a0); + vec_double2 a1_v = spu_splats(z1); + vec_double2 a5_v = spu_splats(a5); + + vec_double2 res_v; + + TEST_START("fmad2"); + res_v = fmad2(x0_v, y0_v, z0_v); + TEST_CHECK("20060828114001MH", allequal_ulps_double2( res_v, a0_v, 1 ), 0); + res_v = fmad2(y0_v, x0_v, z0_v); + TEST_CHECK("20060828114002MH", allequal_ulps_double2( res_v, a0_v, 1 ), 0); + res_v = fmad2(x1_v, y1_v, z1_v); + TEST_CHECK("20060828114003MH", allequal_ulps_double2( res_v, a1_v, 1 ), 0); + res_v = fmad2(y1_v, x1_v, z1_v); + TEST_CHECK("20060828114004MH", allequal_ulps_double2( res_v, a1_v, 1 ), 0); + res_v = fmad2(x2_v, y2_v, z2_v); + TEST_CHECK("20060828114005MH", allnan_double2( res_v ), 0); + res_v = fmad2(y2_v, x2_v, z2_v); + TEST_CHECK("20060828114006MH", allnan_double2( res_v ), 0); + res_v = fmad2(x3_v, y3_v, z3_v); + TEST_CHECK("20060828114007MH", allposinf_double2( res_v ), 0); + res_v = fmad2(y3_v, x3_v, z3_v); + TEST_CHECK("20060828114008MH", allposinf_double2( res_v ), 0); + res_v = fmad2(x4_v, y4_v, z4_v); + TEST_CHECK("20060828114009MH", allposinf_double2( res_v ), 0); + res_v = fmad2(y4_v, x4_v, z4_v); + TEST_CHECK("20060828114010MH", allposinf_double2( res_v ), 0); + res_v = fmad2(x5_v, y5_v, z5_v); + TEST_CHECK("20060828114011MH", allequal_ulps_double2( res_v, a5_v, 1 ), 0); + res_v = fmad2(y5_v, x5_v, z5_v); + TEST_CHECK("20060828114012MH", allequal_ulps_double2( res_v, a5_v, 1 ), 0); +//printf("res:%.10le, a5:%.10le\n", spu_extract(res_v, 0), spu_extract(a5_v, 0)); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c b/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c new file mode 100644 index 000000000..18f6402b3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c @@ -0,0 +1,225 @@ +/* Test fmind2 and fmaxd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * + *@@ fmind2_fmaxd2 - find minimum/maximum value. + * + *@brief + * boundary test for fmind2/fmaxd2. + * + *@pre + * + *@criteria + * + *@note + * + **/ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20060824103000MH","MH", "fmind2_fmaxd2"); + + double denorm_min = hide_double(make_double(0x0000000000000001ull)); + double denorm_max = hide_double(make_double(0x000fffffffffffffull)); + double norm_min = hide_double(make_double(0x0010000000000000ull)); + double norm_max = hide_double(make_double(0x7fefffffffffffffull)); + + double x0min = hide_double(1760.135); + double x0max = hide_double(19355.03); + + double x1min = hide_double(-12351.9); + double x1max = hide_double(-139.035); + + double x2min = hide_double(-1.0); + double x2max = hide_double(0.0); + + double x3min = hide_double(nan("")); + double x3max = hide_double(-1.0); + + double x4min = hide_double(-0.0); + double x4max = hide_double(0.0); + + double x5min = denorm_min; + double x5max = hide_double(1.0e-323); + + double x6min = norm_max; + double x6max = hide_double(1.0/0.0); + + double x7min = hide_double(-1.0/0.0); + double x7max = hide_double(19355.03); + + double x8min = hide_double(-1.0/0.0); + double x8max = hide_double(1.0/0.0); + + double x9min = denorm_max; + double x9max = norm_min; + + vec_double2 x0min_v = spu_splats(x0min); + vec_double2 x0max_v = spu_splats(x0max); + + vec_double2 x1min_v = spu_splats(x1min); + vec_double2 x1max_v = spu_splats(x1max); + + vec_double2 x2min_v = spu_splats(x2min); + vec_double2 x2max_v = spu_splats(x2max); + + vec_double2 x3min_v = spu_splats(x3min); + vec_double2 x3max_v = spu_splats(x3max); + + vec_double2 x4min_v = spu_splats(x4min); + vec_double2 x4max_v = spu_splats(x4max); + + vec_double2 x5min_v = spu_splats(x5min); + vec_double2 x5max_v = spu_splats(x5max); + + vec_double2 x6min_v = spu_splats(x6min); + vec_double2 x6max_v = spu_splats(x6max); + + vec_double2 x7min_v = spu_splats(x7min); + vec_double2 x7max_v = spu_splats(x7max); + + vec_double2 x8min_v = spu_splats(x8min); + vec_double2 x8max_v = spu_splats(x8max); + + vec_double2 x9min_v = spu_splats(x9min); + vec_double2 x9max_v = spu_splats(x9max); + + vec_double2 x51min_v = (vec_double2){x5min, x1min}; + vec_double2 x51max_v = (vec_double2){x5max, x1max}; + + vec_double2 res_v; + + TEST_START("fmind2"); + res_v = fmind2(x0min_v, x0max_v); + TEST_CHECK("20060824103001MH", allequal_double2( res_v, x0min_v ), 0); + res_v = fmind2(x0max_v, x0min_v); + TEST_CHECK("20060824103002MH", allequal_double2( res_v, x0min_v ), 0); + res_v = fmind2(x1min_v, x1max_v); + TEST_CHECK("20060824103003MH", allequal_double2( res_v, x1min_v ), 0); + res_v = fmind2(x1max_v, x1min_v); + TEST_CHECK("20060824103004MH", allequal_double2( res_v, x1min_v ), 0); + res_v = fmind2(x2min_v, x2max_v); + TEST_CHECK("20060824103005MH", allequal_double2( res_v, x2min_v ), 0); + res_v = fmind2(x2max_v, x2min_v); + TEST_CHECK("20060824103006MH", allequal_double2( res_v, x2min_v ), 0); + res_v = fmind2(x3min_v, x3max_v); + TEST_CHECK("20060824103007MH", allequal_double2( res_v, x3max_v ), 0); + res_v = fmind2(x3max_v, x3min_v); + TEST_CHECK("20060824103008MH", allequal_double2( res_v, x3max_v ), 0); + res_v = fmind2(x4min_v, x4max_v); + TEST_CHECK("20060824103009MH", allequal_double2( res_v, x4min_v ), 0); + res_v = fmind2(x4max_v, x4min_v); + TEST_CHECK("20060824103010MH", allequal_double2( res_v, x4min_v ), 0); + res_v = fmind2(x5min_v, x5max_v); + TEST_CHECK("20060824103011MH", allequal_double2( res_v, x5min_v ), 0); + res_v = fmind2(x5max_v, x5min_v); + TEST_CHECK("20060824103012MH", allequal_double2( res_v, x5min_v ), 0); + res_v = fmind2(x6min_v, x6max_v); + TEST_CHECK("20060824103013MH", allequal_double2( res_v, x6min_v ), 0); + res_v = fmind2(x6max_v, x6min_v); + TEST_CHECK("20060824103014MH", allequal_double2( res_v, x6min_v ), 0); + res_v = fmind2(x7min_v, x7max_v); + TEST_CHECK("20060824103015MH", allequal_double2( res_v, x7min_v ), 0); + res_v = fmind2(x7max_v, x7min_v); + TEST_CHECK("20060824103016MH", allequal_double2( res_v, x7min_v ), 0); + res_v = fmind2(x8min_v, x8max_v); + TEST_CHECK("20060824103017MH", allequal_double2( res_v, x8min_v ), 0); + res_v = fmind2(x8max_v, x8min_v); + TEST_CHECK("20060824103018MH", allequal_double2( res_v, x8min_v ), 0); + res_v = fmind2(x9min_v, x9max_v); + TEST_CHECK("20060824103019MH", allequal_double2( res_v, x9min_v ), 0); + res_v = fmind2(x9max_v, x9min_v); + TEST_CHECK("20060824103020MH", allequal_double2( res_v, x9min_v ), 0); + res_v = fmind2(x51min_v, x51max_v); + TEST_CHECK("20060824103021MH", allequal_double2( res_v, x51min_v ), 0); + res_v = fmind2(x51max_v, x51min_v); + TEST_CHECK("20060824103022MH", allequal_double2( res_v, x51min_v ), 0); + + TEST_START("fmaxd2"); + res_v = fmaxd2(x0min_v, x0max_v); + TEST_CHECK("20060824103101MH", allequal_double2( res_v, x0max_v ), 0); + res_v = fmaxd2(x0max_v, x0min_v); + TEST_CHECK("20060824103102MH", allequal_double2( res_v, x0max_v ), 0); + res_v = fmaxd2(x1min_v, x1max_v); + TEST_CHECK("20060824103103MH", allequal_double2( res_v, x1max_v ), 0); + res_v = fmaxd2(x1max_v, x1min_v); + TEST_CHECK("20060824103104MH", allequal_double2( res_v, x1max_v ), 0); + res_v = fmaxd2(x2min_v, x2max_v); + TEST_CHECK("20060824103105MH", allequal_double2( res_v, x2max_v ), 0); + res_v = fmaxd2(x2max_v, x2min_v); + TEST_CHECK("20060824103106MH", allequal_double2( res_v, x2max_v ), 0); + res_v = fmaxd2(x3min_v, x3max_v); + TEST_CHECK("20060824103107MH", allequal_double2( res_v, x3max_v ), 0); + res_v = fmaxd2(x3max_v, x3min_v); + TEST_CHECK("20060824103108MH", allequal_double2( res_v, x3max_v ), 0); + res_v = fmaxd2(x4min_v, x4max_v); + TEST_CHECK("20060824103109MH", allequal_double2( res_v, x4max_v ), 0); + res_v = fmaxd2(x4max_v, x4min_v); + TEST_CHECK("20060824103110MH", allequal_double2( res_v, x4max_v ), 0); + res_v = fmaxd2(x5min_v, x5max_v); + TEST_CHECK("20060824103111MH", allequal_double2( res_v, x5max_v ), 0); + res_v = fmaxd2(x5max_v, x5min_v); + TEST_CHECK("20060824103112MH", allequal_double2( res_v, x5max_v ), 0); + res_v = fmaxd2(x6min_v, x6max_v); + TEST_CHECK("20060824103113MH", allequal_double2( res_v, x6max_v ), 0); + res_v = fmaxd2(x6max_v, x6min_v); + TEST_CHECK("20060824103114MH", allequal_double2( res_v, x6max_v ), 0); + res_v = fmaxd2(x7min_v, x7max_v); + TEST_CHECK("20060824103115MH", allequal_double2( res_v, x7max_v ), 0); + res_v = fmaxd2(x7max_v, x7min_v); + TEST_CHECK("20060824103116MH", allequal_double2( res_v, x7max_v ), 0); + res_v = fmaxd2(x8min_v, x8max_v); + TEST_CHECK("20060824103117MH", allequal_double2( res_v, x8max_v ), 0); + res_v = fmaxd2(x8max_v, x8min_v); + TEST_CHECK("20060824103118MH", allequal_double2( res_v, x8max_v ), 0); + res_v = fmaxd2(x9min_v, x9max_v); + TEST_CHECK("20060824103119MH", allequal_double2( res_v, x9max_v ), 0); + res_v = fmaxd2(x9max_v, x9min_v); + TEST_CHECK("20060824103120MH", allequal_double2( res_v, x9max_v ), 0); + res_v = fmaxd2(x51min_v, x51max_v); + TEST_CHECK("20060824103121MH", allequal_double2( res_v, x51max_v ), 0); + res_v = fmaxd2(x51max_v, x51min_v); + TEST_CHECK("20060824103122MH", allequal_double2( res_v, x51max_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fminf4_fmaxf4.c b/Extras/simdmathlibrary/spu/tests/fminf4_fmaxf4.c new file mode 100644 index 000000000..dd0bcd4a3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fminf4_fmaxf4.c @@ -0,0 +1,124 @@ +/* Test fminf4 and fmaxf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040928184342EJL","EJL", "fminf4_fmaxf4"); + + float x0min = hide_float(1760.135f); + float x0max = hide_float(19355.03f); + + float x1min = hide_float(-12351.9f); + float x1max = hide_float(-139.035f); + + float x2min = hide_float(-1.0); + float x2max = hide_float(0.0); + + vec_float4 x0min_v = spu_splats(x0min); + vec_float4 x0max_v = spu_splats(x0max); + + vec_float4 x1min_v = spu_splats(x1min); + vec_float4 x1max_v = spu_splats(x1max); + + vec_float4 x2min_v = spu_splats(x2min); + vec_float4 x2max_v = spu_splats(x2max); + + float res; + vec_float4 res_v; + + TEST_START("fminf4"); + res_v = fminf4(x0min_v, x0max_v); + TEST_CHECK("20040928184345EJL", allequal_float4( res_v, x0min_v ), 0); + res_v = fminf4(x0max_v, x0min_v); + TEST_CHECK("20040928184349EJL", allequal_float4( res_v, x0min_v ), 0); + res_v = fminf4(x1min_v, x1max_v); + TEST_CHECK("20040928184351EJL", allequal_float4( res_v, x1min_v ), 0); + res_v = fminf4(x1max_v, x1min_v); + TEST_CHECK("20040928184353EJL", allequal_float4( res_v, x1min_v ), 0); + res_v = fminf4(x2min_v, x2max_v); + TEST_CHECK("20040928184354EJL", allequal_float4( res_v, x2min_v ), 0); + res_v = fminf4(x2max_v, x2min_v); + TEST_CHECK("20040928184356EJL", allequal_float4( res_v, x2min_v ), 0); + + TEST_START("fminf"); + res = fminf(x0min, x0max); + TEST_CHECK("20040928184358EJL", res == x0min, 0); + res = fminf(x0max, x0min); + TEST_CHECK("20040928184400EJL", res == x0min, 0); + res = fminf(x1min, x1max); + TEST_CHECK("20040928184401EJL", res == x1min, 0); + res = fminf(x1max, x1min); + TEST_CHECK("20040928184403EJL", res == x1min, 0); + res = fminf(x2min, x2max); + TEST_CHECK("20040928184405EJL", res == x2min, 0); + res = fminf(x2max, x2min); + TEST_CHECK("20040928184406EJL", res == x2min, 0); + + TEST_START("fmaxf4"); + res_v = fmaxf4(x0min_v, x0max_v); + TEST_CHECK("20040928184411EJL", allequal_float4( res_v, x0max_v ), 0); + res_v = fmaxf4(x0max_v, x0min_v); + TEST_CHECK("20040928184413EJL", allequal_float4( res_v, x0max_v ), 0); + res_v = fmaxf4(x1min_v, x1max_v); + TEST_CHECK("20040928184415EJL", allequal_float4( res_v, x1max_v ), 0); + res_v = fmaxf4(x1max_v, x1min_v); + TEST_CHECK("20040928184416EJL", allequal_float4( res_v, x1max_v ), 0); + res_v = fmaxf4(x2min_v, x2max_v); + TEST_CHECK("20040928184417EJL", allequal_float4( res_v, x2max_v ), 0); + res_v = fmaxf4(x2max_v, x2min_v); + TEST_CHECK("20040928184419EJL", allequal_float4( res_v, x2max_v ), 0); + + TEST_START("fmaxf"); + res = fmaxf(x0min, x0max); + TEST_CHECK("20040928184420EJL", res == x0max, 0); + res = fmaxf(x0max, x0min); + TEST_CHECK("20040928184422EJL", res == x0max, 0); + res = fmaxf(x1min, x1max); + TEST_CHECK("20040928184423EJL", res == x1max, 0); + res = fmaxf(x1max, x1min); + TEST_CHECK("20040928184424EJL", res == x1max, 0); + res = fmaxf(x2min, x2max); + TEST_CHECK("20040928184426EJL", res == x2max, 0); + res = fmaxf(x2max, x2min); + TEST_CHECK("20040928184428EJL", res == x2max, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fmodd2.c b/Extras/simdmathlibrary/spu/tests/fmodd2.c new file mode 100644 index 000000000..6e1b842e2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fmodd2.c @@ -0,0 +1,364 @@ +/* Test roundd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ fmodd2 + + * + *@brief + * boundary test for fmodd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +typedef union { + struct { + double xxx[2]; + double yyy[2]; + double rrr[2]; + } dbl; + struct { + unsigned long long xxx[2]; + unsigned long long yyy[2]; + unsigned long long rrr[2]; + } ull; +} TestVec_Roundd2; + +int main() +{ + TestVec_Roundd2 test_a[] = { +{ +ull:{ +// 1.00...11 / 1.0...0 -> 0.00...11, 1.00...1 /1.0...0 -> 0.0...1 +{0x3ff0000000000003ULL,0x3ff0000000000001ULL}, +{0x3fe0000000000000ULL,0x3fe0000000000000ULL}, +{0x3cc8000000000000ULL,0x3cb0000000000000ULL} +} +},{ +ull:{ +// 1.00...1111 / 0.10...0 -> 0.00...11, 1.00...1 /0.10...0 -> 0.0...1 +{0x3ff000000000000fULL,0x3ff0000000000001ULL}, +{0x3fe0000000000000ULL,0x3fe0000000000000ULL}, +{0x3cee000000000000ULL,0x3cb0000000000000ULL} +} +}, + +{ +ull:{ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0x0000000000000000ULL, 0x7aa0000000000000ULL},/*(0.000000 4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0x7ff0000000000000ULL, 0x0000000000000001ULL},/*(inf 0.000000)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0xfff0000000000000ULL, 0x7fffffffeffffffeULL},/*(-inf nan)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x0000000000000000ULL, 0x7aa0000000000000ULL},/*(0.000000 4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0x0000000000000001ULL},/*(inf 0.000000)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0xfff0000000000000ULL, 0x7fffffffeffffffeULL},/*(-inf nan)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL},/*(nan nan)*/ +{0x7fffffffeffffffeULL, 0x7fffffffeffffffeULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x0000000000000000ULL, 0x7aa0000000000000ULL},/*(0.000000 4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL},/*(0.000000 0.000000)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0x0000000000000001ULL},/*(inf 0.000000)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL},/*(0.000000 0.000000)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0xfff0000000000000ULL, 0x7fffffffeffffffeULL},/*(-inf nan)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL},/*(0.000000 0.000000)*/ +{0x0000000000000000ULL, 0x7fffffffeffffffeULL}/*(0.000000, nan)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0x7ff0000000000000ULL},/*(inf inf)*/ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x7ff8000000000000ULL, 0x7ff8000000000000ULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0xfff0000000000000ULL, 0xfff0000000000000ULL},/*(-inf -inf)*/ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x7ff8000000000000ULL, 0x7ff8000000000000ULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x7aa0000000000000ULL, 0x7aa0000000000000ULL},/*(4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000 4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000)*/ +{0x7ff8000000000000ULL, 0x7ff8000000000000ULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x0000000000000001ULL, 0x0000000000000001ULL},/*(0.000000 0.000000)*/ +{0x7ff8000000000000ULL, 0x7ff8000000000000ULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x7aa0000000000000ULL, 0x0000000000000001ULL},/*(4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000 0.000000)*/ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x7aa0000000000000ULL, 0x0000000000000001ULL}/*(4646927838993072071243945306718439250410188130185607684549287060362314553626263667328650982800488595593121011844353040782670578892327830336846345565944983205571783876398312106070895030732180427719497486819848241268961714708993375275490934188589556509412640954544199727574266715045888.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0000000000000000ULL, 0x0000000000000000ULL},/*(0.000000 0.000000)*/ +{0x7ff0000000000000ULL, 0xfff0000000000000ULL},/*(inf -inf)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x7fefffffffffffffULL, 0x7fefffffffffffffULL},/*(179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0x7feffffffffffffeULL, 0x7feeffffffffffffULL},/*(179769313486231550856124328384506240234343437157459335924404872448581845754556114388470639943126220321960804027157371570809852884964511743044087662767600909594331927728237078876188760579532563768698654064825262115771015791463983014857704008123419459386245141723703148097529108423358883457665451722744025579520.000000 174151522439786833602873345010488654505514389466650288559122786766978114661106633662692418716441642670880455394768401421807484146614013663315566064496053950599244663907024235661361045727850388502842959666202477512028391028080020983119748774627722497113443122745251401348739825745341808746724299140883367854080.000000)*/ +{0x7ca0000000000000ULL, 0x7fa0000000000000ULL}/*(19958403095347198116563727130368385660674512604354575415025472424372118918689640657849579654926357010893424468441924952439724379883935936607391717982848314203200056729510856765175377214443629871826533567445439239933308104551208703888888552684480441575071209068757560416423584952303440099278848.000000, 5617791046444737211654078721215702292556178059194708039794690036179146118921905097897139916325235500660003558745981042426837180275450519452901482207483566386805246669527046414884444362538940441232908842252656430276192208823201965046059784704400851161354703458893321819998351435577491134526104885300757004288.000000)*/ +} +}, +{ +ull:{ +{0x0008000000000000ULL, 0x7fefffffffffffffULL},/*(0.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0x7ff0000000000000ULL, 0x7ff0000000000000ULL},/*(inf inf)*/ +{0x0008000000000000ULL, 0x7fefffffffffffffULL}/*(0.000000, 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +} +}, +{ +ull:{ +{0x7fefffffffffffffULL, 0x0010000000000000ULL},/*(179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 0.000000)*/ +{0x7fdfffffffffffffULL, 0x0008000000000003ULL},/*(89884656743115785407263711865852178399035283762922498299458738401578630390014269380294779316383439085770229476757191232117160663444732091384233773351768758493024955288275641038122745045194664472037934254227566971152291618451611474082904279666061674137398913102072361584369088590459649940625202013092062429184.000000 0.000000)*/ +{0x0000000000000000ULL, 0x0007fffffffffffdULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x7ff0000000000000ULL, 0x7fefffffffffffffULL},/*(inf 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0x7fefffffffffffffULL, 0x0000000000000003ULL},/*(179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 0.000000)*/ +{0x7ff8000000000000ULL, 0x0000000000000002ULL}/*(nan, 0.000000)*/ +} +}, +{ +ull:{ +{0xffe0000000000000ULL, 0x7fefffffffffffffULL},/*(-89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0x0008000000000000ULL, 0x0010000000000000ULL},/*(0.000000 0.000000)*/ +{0x8000000000000000ULL, 0x0000000000000000ULL}/*(-0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x4004000000000000ULL, 0x403d000000000000ULL},/*(2.500000 29.000000)*/ +{0x3ff8000000000000ULL, 0x4008000000000000ULL},/*(1.500000 3.000000)*/ +{0x3ff0000000000000ULL, 0x4000000000000000ULL}/*(1.000000, 2.000000)*/ +} +}, +{ +ull:{ +{0x8010000000000002ULL, 0x9000000000000000ULL},/*(-0.000000 -0.000000)*/ +{0x8000000000000000ULL, 0x0000000000000000ULL},/*(-0.000000 0.000000)*/ +{0x0000000000000000ULL, 0x0000000000000000ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0xffefffffffffffffULL, 0x7fefffffffffffffULL},/*(-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0xffefffffffffffffULL, 0x7fefffffffffffffULL},/*(-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000)*/ +{0x8000000000000000ULL, 0x0000000000000000ULL}/*(-0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0xfff0000000000000ULL, 0x7ff0000000000000ULL},/*(-inf inf)*/ +{0xfff0000000000000ULL, 0x7ff0000000000000ULL},/*(-inf inf)*/ +{0x7ff8000000000000ULL, 0x7ff8000000000000ULL}/*(nan, nan)*/ +} +}, +{ +ull:{ +{0x800ffffffffffff3ULL, 0x09d0000000000006ULL},/*(-0.000000 0.000000)*/ +{0x8000000000000005ULL, 0x8010000000000005ULL},/*(-0.000000 -0.000000)*/ +{0x8000000000000003ULL, 0x000fffffffffff88ULL}/*(-0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x1000000000000000ULL, 0x0000000000000001ULL},/*(0.000000 0.000000)*/ +{0x0010000000000005ULL, 0x8000000000000001ULL},/*(0.000000 -0.000000)*/ +{0x00058000000001eaULL, 0x0000000000000000ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0010000000000002ULL, 0x1000000000000000ULL},/*(0.000000 0.000000)*/ +{0x800000000000000bULL, 0x0010000000000007ULL},/*(-0.000000 0.000000)*/ +{0x0000000000000006ULL, 0x000c800000000e62ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0000000000000002ULL, 0x0000000000000003ULL},/*(0.000000 0.000000)*/ +{0x0000000000000003ULL, 0x0000000000000002ULL},/*(0.000000 0.000000)*/ +{0x0000000000000002ULL, 0x0000000000000001ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x806ffffffffffff3ULL, 0x0010000000000000ULL},/*(-0.000000 0.000000)*/ +{0x8000000000000005ULL, 0x0000000000000009ULL},/*(-0.000000 0.000000)*/ +{0x8000000000000003ULL, 0x0000000000000007ULL}/*(-0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0000000000000000ULL, 0x8000000000000000ULL},/*(0.000000 -0.000000)*/ +{0x800ffffffffffff1ULL, 0x0000000000000003ULL},/*(-0.000000 0.000000)*/ +{0x0000000000000000ULL, 0x8000000000000000ULL}/*(0.000000, -0.000000)*/ +} +}, +{ +ull:{ +{0x1000000000000000ULL, 0x8000000000000000ULL},/*(0.000000 -0.000000)*/ +{0x0010000000000005ULL, 0x0000000000000001ULL},/*(0.000000 0.000000)*/ +{0x00058000000001eaULL, 0x8000000000000000ULL}/*(0.000000, -0.000000)*/ +} +}, +{ +ull:{ +{0x0fbfffffffffffffULL, 0x0f30000000000000ULL},/*(0.000000 0.000000)*/ +{0x000ffffffffffff1ULL, 0x0000000000000003ULL},/*(0.000000 0.000000)*/ +{0x000b7400000053f7ULL, 0x0000000000000001ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0000000000000003ULL, 0x0000000000000003ULL},/*(0.000000 0.000000)*/ +{0x0000000000000006ULL, 0x8000000000000006ULL},/*(0.000000 -0.000000)*/ +{0x0000000000000003ULL, 0x0000000000000003ULL}/*(0.000000, 0.000000)*/ +} +}, +{ +ull:{ +{0x0020000000000001ULL, 0x8020000000000001ULL},/*(0.000000 -0.000000)*/ +{0x8010000000000002ULL, 0x0010000000000002ULL},/*(-0.000000 0.000000)*/ +{0x0010000000000000ULL, 0x8010000000000000ULL}/*(0.000000, -0.000000)*/ +} +}, + + + { + ull:{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + //vec_double2 input; + + TEST_SET_START("96743652190000NM","FLR", "floord2"); + + TEST_START("floord2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ull.xxx[0] == 0) + && (test_a[ii].ull.xxx[1] == 0) + && (test_a[ii].ull.xxx[2] == 0) ) break; + + //input = *((vec_double2 *)&test_a[ii].dbl.xxx[0]); + res_v = fmodd2 (*((vec_double2 *)&test_a[ii].dbl.xxx[0]), + *((vec_double2 *)&test_a[ii].dbl.yyy[0]) ); + sprintf(msg,"9674365219%04dFLR", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, (vec_llong2)*((vec_double2 *)&test_a[ii].dbl.rrr[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/fmodf4.c b/Extras/simdmathlibrary/spu/tests/fmodf4.c new file mode 100644 index 000000000..48766f430 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fmodf4.c @@ -0,0 +1,145 @@ +/* Test fmodf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040928191240EJL","EJL", "fmodf4"); + + unsigned int i0n = 0x449edbc6; + unsigned int i0d = 0x40cf799d; + unsigned int i0r = 0x3daa7300; + unsigned int i1n = 0x6bca107a; + unsigned int i1d = 0x6c4a107a; + unsigned int i1r = 0x6bca107a; + unsigned int i2n = 0x1c123605; + unsigned int i2d = 0x1c923602; + unsigned int i2r = 0x1c123605; + unsigned int i3n = 0x2b4c50fa; + unsigned int i3d = 0x253a3ae3; + unsigned int i3r = 0x25141df9; + unsigned int i4n = 0x73addffc; + unsigned int i4d = 0x742ddffc; + unsigned int i4r = 0x73addffc; + unsigned int i5n = 0x29d4d97c; + unsigned int i5d = 0x2a546e77; + unsigned int i5r = 0x29d4d97c; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0d_v = spu_splats(x0d); + vec_float4 x0r_v = spu_splats(x0r); + + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1d_v = spu_splats(x1d); + vec_float4 x1r_v = spu_splats(x1r); + + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2d_v = spu_splats(x2d); + vec_float4 x2r_v = spu_splats(x2r); + + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3d_v = spu_splats(x3d); + vec_float4 x3r_v = spu_splats(x3r); + + vec_float4 x4n_v = spu_splats(x4n); + vec_float4 x4d_v = spu_splats(x4d); + vec_float4 x4r_v = spu_splats(x4r); + + vec_float4 x5n_v = spu_splats(x5n); + vec_float4 x5d_v = spu_splats(x5d); + vec_float4 x5r_v = spu_splats(x5r); + + float res; + vec_float4 res_v; + + TEST_START("fmodf4"); + res_v = fmodf4(x0n_v, x0d_v); + TEST_CHECK("20040928191245EJL", allequal_ulps_float4( res_v, x0r_v, 1 ), 0); + res_v = fmodf4(x1n_v, x1d_v); + TEST_CHECK("20040928191247EJL", allequal_ulps_float4( res_v, x1r_v, 1 ), 0); + res_v = fmodf4(x2n_v, x2d_v); + TEST_CHECK("20040928191249EJL", allequal_ulps_float4( res_v, x2r_v, 1 ), 0); + res_v = fmodf4(x3n_v, x3d_v); + TEST_CHECK("20040928191251EJL", allequal_ulps_float4( res_v, x3r_v, 1 ), 0); + res_v = fmodf4(x4n_v, x4d_v); + TEST_CHECK("20040928191253EJL", allequal_ulps_float4( res_v, x4r_v, 1 ), 0); + res_v = fmodf4(x5n_v, x5d_v); + TEST_CHECK("20040928191255EJL", allequal_ulps_float4( res_v, x5r_v, 1 ), 0); + + TEST_START("fmodf"); + res = fmodf(x0n, x0d); + TEST_CHECK("20040928191258EJL", ulpDiff_f( res, x0r ) <= 1, 0); + res = fmodf(x1n, x1d); + TEST_CHECK("20040928191300EJL", ulpDiff_f( res, x1r ) <= 1, 0); + res = fmodf(x2n, x2d); + TEST_CHECK("20040928191302EJL", ulpDiff_f( res, x2r ) <= 1, 0); + res = fmodf(x3n, x3d); + TEST_CHECK("20040928191303EJL", ulpDiff_f( res, x3r ) <= 1, 0); + res = fmodf(x4n, x4d); + TEST_CHECK("20040928191305EJL", ulpDiff_f( res, x4r ) <= 1, 0); + res = fmodf(x5n, x5d); + TEST_CHECK("20040928191307EJL", ulpDiff_f( res, x5r ) <= 1, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c b/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c new file mode 100644 index 000000000..5ca3d6300 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c @@ -0,0 +1,225 @@ +/* Test fpclassifyd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060828000000AAN","AAN", "fpclassifyd2"); + + // -Nan + double x0 = hide_double(-nan("")); + long long r0 = FP_NAN; + + // -Inf + double x1 = hide_double(-1.0/0.0); + long long r1 = FP_INFINITE; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + long long r2 = FP_NORMAL; + + // -Norm + double x3 = hide_double(-824842.58421394); + long long r3 = FP_NORMAL; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + long long r4 = FP_NORMAL; + + // -Denorm + double x5 = hide_double(-2.40e-310); + long long r5 = FP_SUBNORMAL; + + // -Unf + double x6 = hide_double(-1.0e-999); + long long r6 = FP_ZERO; + + // -0 + double x7 = hide_double(-0.0); + long long r7 = FP_ZERO; + + // 0 + double x8 = hide_double( 0.0); + long long r8 = FP_ZERO; + + // +Unf + double x9 = hide_double( 1.0e-999); + long long r9 = FP_ZERO; + + // +Denorm + double x10 = hide_double( 2.40e-310); + long long r10 = FP_SUBNORMAL; + + // +Dmin + double x11 = hide_double( DBL_MIN); + long long r11 = FP_NORMAL; + + // +Norm + double x12 = hide_double(3.14152634); + long long r12 = FP_NORMAL; + + // +Dmax + double x13 = hide_double(DBL_MAX); + long long r13 = FP_NORMAL; + + // +Inf + double x14 = hide_double( 1.0/0.0); + long long r14 = FP_INFINITE; + + //+Nan + double x15 = hide_double( nan("")); + long long r15 = FP_NAN; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0x000AAAAAAAAAAAAAull), -1.0e-999 }; + vec_llong2 r16_v = (vec_llong2) {FP_SUBNORMAL, FP_ZERO}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_llong2 r17_v = (vec_llong2) {FP_NORMAL, FP_SUBNORMAL}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_llong2 r18_v = (vec_llong2) {FP_NAN, FP_NORMAL}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_llong2 r19_v = (vec_llong2) {FP_INFINITE, FP_NAN}; + + // Compound + vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_llong2 r20_v = (vec_llong2) {FP_ZERO, FP_INFINITE}; + + vec_double2 x0_v = spu_splats(x0); + vec_llong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_llong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_llong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_llong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_llong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_llong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_llong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_llong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_llong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_llong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_llong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_llong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_llong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_llong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_llong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_llong2 r15_v = spu_splats(r15); + + vec_llong2 res_v; + + TEST_START("fpclassifyd2"); + + res_v = (vec_llong2)fpclassifyd2(x0_v); + TEST_CHECK("20060828000000AAN", allequal_llong2( res_v, r0_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x1_v); + TEST_CHECK("20060828000001AAN", allequal_llong2( res_v, r1_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x2_v); + TEST_CHECK("20060828000002AAN", allequal_llong2( res_v, r2_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x3_v); + TEST_CHECK("20060828000003AAN", allequal_llong2( res_v, r3_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x4_v); + TEST_CHECK("20060828000004AAN", allequal_llong2( res_v, r4_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x5_v); + TEST_CHECK("20060828000005AAN", allequal_llong2( res_v, r5_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x6_v); + TEST_CHECK("20060828000006AAN", allequal_llong2( res_v, r6_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x7_v); + TEST_CHECK("20060828000007AAN", allequal_llong2( res_v, r7_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x8_v); + TEST_CHECK("20060828000008AAN", allequal_llong2( res_v, r8_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x9_v); + TEST_CHECK("20060828000009AAN", allequal_llong2( res_v, r9_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x10_v); + TEST_CHECK("20060828000010AAN", allequal_llong2( res_v, r10_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x11_v); + TEST_CHECK("20060828000011AAN", allequal_llong2( res_v, r11_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x12_v); + TEST_CHECK("20060828000012AAN", allequal_llong2( res_v, r12_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x13_v); + TEST_CHECK("20060828000013AAN", allequal_llong2( res_v, r13_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x14_v); + TEST_CHECK("20060828000014AAN", allequal_llong2( res_v, r14_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x15_v); + TEST_CHECK("20060828000015AAN", allequal_llong2( res_v, r15_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x16_v); + TEST_CHECK("20060828000016AAN", allequal_llong2( res_v, r16_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x17_v); + TEST_CHECK("20060828000017AAN", allequal_llong2( res_v, r17_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x18_v); + TEST_CHECK("20060828000018AAN", allequal_llong2( res_v, r18_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x19_v); + TEST_CHECK("20060828000019AAN", allequal_llong2( res_v, r19_v ), 0); + res_v = (vec_llong2)fpclassifyd2(x20_v); + TEST_CHECK("20060828000020AAN", allequal_llong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c b/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c new file mode 100644 index 000000000..c0691648f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c @@ -0,0 +1,200 @@ +/* Test fpclassifyf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060828000000AAN","AAN", "fpclassifyf4"); + + // -Nan + float x0 = hide_float(-nan("")); + int r0 = FP_NORMAL; + + // -Inf + float x1 = hide_float(-1.0/0.0); + int r1 = FP_NORMAL; + + // -Smax + float x2 = hide_float(make_float(0xFFFFFFFF)); + int r2 = FP_NORMAL; + + // -Norm + float x3 = hide_float(-824842.58421394f); + int r3 = FP_NORMAL; + + // -Smin + float x4 = hide_float(make_float(0x80800000)); + int r4 = FP_NORMAL; + + // -Denorm + float x5 = hide_float(make_float(0x803AAAAA)); + int r5 = FP_SUBNORMAL; + + // -Unf + float x6 = hide_float(-1.0e-999); + int r6 = FP_ZERO; + + // -0 + float x7 = hide_float(-0.0f); + int r7 = FP_ZERO; + + // 0 + float x8 = hide_float( 0.0f); + int r8 = FP_ZERO; + + // +Unf + float x9 = hide_float( 1.0e-999); + int r9 = FP_ZERO; + + // +Denorm + float x10 = hide_float(make_float(0x003AAAAA)); + int r10 = FP_SUBNORMAL; + + // +Smin + float x11 = hide_float(make_float(0x00800000)); + int r11 = FP_NORMAL; + + // +Norm + float x12 = hide_float(3.14152634f); + int r12 = FP_NORMAL; + + // +Smax + float x13 = hide_float(make_float(0x7FFFFFFF)); + int r13 = FP_NORMAL; + + // +Inf + float x14 = hide_float( 1.0/0.0); + int r14 = FP_NORMAL; + + //+Nan + float x15 = hide_float( nan("")); + int r15 = FP_NORMAL; + + // Compound + vec_float4 x16_v = (vec_float4) {make_float(0x003AAAAA), -1.0e-999, 345.27533, make_float(0x803AAAAA)}; + vec_int4 r16_v = (vec_int4) {FP_SUBNORMAL, FP_ZERO, FP_NORMAL, FP_SUBNORMAL}; + + vec_float4 x0_v = spu_splats(x0); + vec_int4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_int4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_int4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_int4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_int4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_int4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_int4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_int4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_int4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_int4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_int4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_int4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_int4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_int4 r13_v = spu_splats(r13); + + vec_float4 x14_v = spu_splats(x14); + vec_int4 r14_v = spu_splats(r14); + + vec_float4 x15_v = spu_splats(x15); + vec_int4 r15_v = spu_splats(r15); + + vec_int4 res_v; + + TEST_START("fpclassifyf4"); + + res_v = (vec_int4)fpclassifyf4(x0_v); + TEST_CHECK("20060828000000AAN", allequal_int4( res_v, r0_v ), 0); + res_v = (vec_int4)fpclassifyf4(x1_v); + TEST_CHECK("20060828000001AAN", allequal_int4( res_v, r1_v ), 0); + res_v = (vec_int4)fpclassifyf4(x2_v); + TEST_CHECK("20060828000002AAN", allequal_int4( res_v, r2_v ), 0); + res_v = (vec_int4)fpclassifyf4(x3_v); + TEST_CHECK("20060828000003AAN", allequal_int4( res_v, r3_v ), 0); + res_v = (vec_int4)fpclassifyf4(x4_v); + TEST_CHECK("20060828000004AAN", allequal_int4( res_v, r4_v ), 0); + res_v = (vec_int4)fpclassifyf4(x5_v); + TEST_CHECK("20060828000005AAN", allequal_int4( res_v, r5_v ), 0); + res_v = (vec_int4)fpclassifyf4(x6_v); + TEST_CHECK("20060828000006AAN", allequal_int4( res_v, r6_v ), 0); + res_v = (vec_int4)fpclassifyf4(x7_v); + TEST_CHECK("20060828000007AAN", allequal_int4( res_v, r7_v ), 0); + res_v = (vec_int4)fpclassifyf4(x8_v); + TEST_CHECK("20060828000008AAN", allequal_int4( res_v, r8_v ), 0); + res_v = (vec_int4)fpclassifyf4(x9_v); + TEST_CHECK("20060828000009AAN", allequal_int4( res_v, r9_v ), 0); + res_v = (vec_int4)fpclassifyf4(x10_v); + TEST_CHECK("20060828000010AAN", allequal_int4( res_v, r10_v ), 0); + res_v = (vec_int4)fpclassifyf4(x11_v); + TEST_CHECK("20060828000011AAN", allequal_int4( res_v, r11_v ), 0); + res_v = (vec_int4)fpclassifyf4(x12_v); + TEST_CHECK("20060828000012AAN", allequal_int4( res_v, r12_v ), 0); + res_v = (vec_int4)fpclassifyf4(x13_v); + TEST_CHECK("20060828000013AAN", allequal_int4( res_v, r13_v ), 0); + res_v = (vec_int4)fpclassifyf4(x14_v); + TEST_CHECK("20060828000014AAN", allequal_int4( res_v, r14_v ), 0); + res_v = (vec_int4)fpclassifyf4(x15_v); + TEST_CHECK("20060828000015AAN", allequal_int4( res_v, r15_v ), 0); + res_v = (vec_int4)fpclassifyf4(x16_v); + TEST_CHECK("20060828000016AAN", allequal_int4( res_v, r16_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/frexpd2.c b/Extras/simdmathlibrary/spu/tests/frexpd2.c new file mode 100644 index 000000000..f05e24b04 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/frexpd2.c @@ -0,0 +1,426 @@ +/* Test frexpd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#ifndef DBL_INF +#define DBL_INF ((long long)0x7FF0000000000000ull) +#endif + +#ifndef DBL_NAN +#define DBL_NAN ((long long)0x7FF8000000000000ull) +#endif + +int main() +{ + TEST_SET_START("20060907000000AAN","AAN", "frexpd2"); + + vec_double2 res_v; + vec_llong2 exp_v; + + // -Nan + double x0 = hide_double(-nan("")); + double r0 = hide_double( nan("")); + //long long e0 = 0; + + // -Inf + double x1 = hide_double(-1.0/0.0); + double r1 = x1; + //long long e1 = 0; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + double r2 = hide_double(make_double(0xBFEFFFFFFFFFFFFFull)); + long long e2 = 1024; + + // -Norm + double x3 = hide_double(-824842.58421394); + double r3 = hide_double(make_double(0xBFE92C152B1E16ECull)); + long long e3 = 20; + + // -Dmin + double x4 = hide_double(make_double(0x8000000000000001ull)); + double r4 = hide_double(make_double(0xBFE0000000000000ull)); + long long e4 = -1073; + + // -Denorm + double x5 = hide_double(-2.40e-310); + double r5 = hide_double(make_double(0xBFE6170DBAADCD80ull)); + long long e5 = -1028; + + // -Unf + double x6 = hide_double(-1.0e-999); + double r6 = hide_double(make_double(0x0000000000000000ull)); + long long e6 = 0; + + // -0 + double x7 = hide_double(-0.0); + double r7 = hide_double(make_double(0x0000000000000000ull)); + long long e7 = 0; + + // 0 + double x8 = hide_double( 0.0); + double r8 = hide_double(make_double(0x0000000000000000ull)); + long long e8 = 0; + + // +Unf + double x9 = hide_double( 1.0e-999); + double r9 = hide_double(make_double(0x0000000000000000ull)); + long long e9 = 0; + + // +Denorm + double x10 = hide_double( 2.40e-310); + double r10 = hide_double(make_double(0x3FE6170DBAADCD80ull)); + long long e10 = -1028; + + // +Dmin + double x11 = hide_double(make_double(0x0000000000000001ull)); + double r11 = hide_double(make_double(0x3FE0000000000000ull)); + long long e11 = -1073; + + // +Norm + double x12 = hide_double(3.14152634); + double r12 = hide_double(make_double(0x3FE921D88FCE94A3ull)); + long long e12 = 2; + + // +Norm + double x13 = hide_double(7.0673903e149); + double r13 = hide_double(make_double(0x3FEBA2C056BA0DB2ull)); + long long e13 = 498; + + // +Norm + double x14 = hide_double(2.4673e304); + double r14 = hide_double(make_double(0x3FE1FD485BDF688Eull)); + long long e14 = 1012; + + // +Norm + double x15 = hide_double(7.235672e-25); + double r15 = hide_double(make_double(0x3FEBFDDCCA6FF682ull)); + long long e15 = -80; + + // +Denorm + double x16 = hide_double(9.452854e-312); + double r16 = hide_double(make_double(0x3FEBD784FE999000ull)); + long long e16 = -1033; + + // +Demorm + double x17 = hide_double(3.045784e-320); + double r17 = hide_double(make_double(0x3FE8150000000000ull)); + long long e17 = -1061; + + // -Norm + double x18 = hide_double(-6.459273e7); + double r18 = hide_double(make_double(0xBFEECCDAD0000000ull)); + long long e18 = 26; + + // -Norm + double x19 = hide_double(-2.493472e-99); + double r19 = hide_double(make_double(0xBFE5D0BDA52F448Cull)); + long long e19 = -327; + + // -Norm + double x20 = hide_double(-1.4824543e128); + double r20 = hide_double(make_double(0xBFEB5FFBEAE7B3E1ull)); + long long e20 = 426; + + // -Denorm + double x21 = hide_double(-5.53856231e-315); + double r21 = hide_double(make_double(0xBFE0B457A5000000ull)); + long long e21 = -1043; + + // -Demorm + double x22 = hide_double(-2.5684367e-312); + double r22 = hide_double(make_double(0xBFEE427A82514000ull)); + long long e22 = -1035; + + // +Dmax + double x23 = hide_double(DBL_MAX); + double r23 = hide_double(make_double(0x3FEFFFFFFFFFFFFFull)); + long long e23 = 1024; + + // +Inf + double x24 = hide_double( 1.0/0.0 ); + double r24 = x24; + //long long e24 = 0; + + //+Nan + double x25 = hide_double( nan("")); + double r25 = hide_double( nan("")); + //long long e25 = 0; + + // Compound + vec_double2 x26_v = (vec_double2) { -2.561286432e-317, -1.0e-999 }; + vec_double2 r26_v = (vec_double2) { hide_double(make_double(0xBFE3C69940000000ull)), 0.0 }; + vec_llong2 e26_v = (vec_llong2) { -1051, 0 }; + + // Compound + vec_double2 x27_v = (vec_double2) { 345.27533, -8.673e-310 }; + vec_double2 r27_v = (vec_double2) { hide_double(make_double(0x3FE59467C06E19B9ull)), hide_double(make_double(0xBFE3F4FCCDB156C0ull)) }; + vec_llong2 e27_v = (vec_llong2) { 9, -1026 }; + + // Compound + vec_llong2 keep28_v = exp_v; + vec_double2 x28_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_double2 r28_v = (vec_double2) { nan(""), hide_double(make_double(0xBFEC1043703246A4ull)) }; + vec_llong2 e28_v = (vec_llong2) { spu_extract(exp_v, 0), 22 }; + + // Compound + vec_llong2 keep29_v = exp_v; + vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 r29_v = (vec_double2) { 1.0/0.0, nan("") }; + vec_llong2 e29_v = (vec_llong2) { spu_extract(exp_v, 0), spu_extract(exp_v, 1) }; + + // Compound + vec_llong2 keep30_v = exp_v; + vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; + vec_double2 r30_v = (vec_double2) { hide_double(make_double(0xBFE4FF632B6A83E4ull)), -1.0/0.0 }; + vec_llong2 e30_v = (vec_llong2) { -328, spu_extract(exp_v, 1) }; + + vec_llong2 keep0_v = exp_v; + vec_double2 x0_v = spu_splats(x0); + vec_double2 r0_v = spu_splats(r0); + vec_llong2 e0_v = exp_v; + + vec_llong2 keep1_v = exp_v; + vec_double2 x1_v = spu_splats(x1); + vec_double2 r1_v = spu_splats(r1); + vec_llong2 e1_v = exp_v; + + vec_double2 x2_v = spu_splats(x2); + vec_double2 r2_v = spu_splats(r2); + vec_llong2 e2_v = spu_splats(e2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 r3_v = spu_splats(r3); + vec_llong2 e3_v = spu_splats(e3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 r4_v = spu_splats(r4); + vec_llong2 e4_v = spu_splats(e4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 r5_v = spu_splats(r5); + vec_llong2 e5_v = spu_splats(e5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 r6_v = spu_splats(r6); + vec_llong2 e6_v = spu_splats(e6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 r7_v = spu_splats(r7); + vec_llong2 e7_v = spu_splats(e7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 r8_v = spu_splats(r8); + vec_llong2 e8_v = spu_splats(e8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 r9_v = spu_splats(r9); + vec_llong2 e9_v = spu_splats(e9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 r10_v = spu_splats(r10); + vec_llong2 e10_v = spu_splats(e10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 r11_v = spu_splats(r11); + vec_llong2 e11_v = spu_splats(e11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 r12_v = spu_splats(r12); + vec_llong2 e12_v = spu_splats(e12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 r13_v = spu_splats(r13); + vec_llong2 e13_v = spu_splats(e13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 r14_v = spu_splats(r14); + vec_llong2 e14_v = spu_splats(e14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 r15_v = spu_splats(r15); + vec_llong2 e15_v = spu_splats(e15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 r16_v = spu_splats(r16); + vec_llong2 e16_v = spu_splats(e16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 r17_v = spu_splats(r17); + vec_llong2 e17_v = spu_splats(e17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 r18_v = spu_splats(r18); + vec_llong2 e18_v = spu_splats(e18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 r19_v = spu_splats(r19); + vec_llong2 e19_v = spu_splats(e19); + + vec_double2 x20_v = spu_splats(x20); + vec_double2 r20_v = spu_splats(r20); + vec_llong2 e20_v = spu_splats(e20); + + vec_double2 x21_v = spu_splats(x21); + vec_double2 r21_v = spu_splats(r21); + vec_llong2 e21_v = spu_splats(e21); + + vec_double2 x22_v = spu_splats(x22); + vec_double2 r22_v = spu_splats(r22); + vec_llong2 e22_v = spu_splats(e22); + + vec_double2 x23_v = spu_splats(x23); + vec_double2 r23_v = spu_splats(r23); + vec_llong2 e23_v = spu_splats(e23); + + vec_llong2 keep24_v = exp_v; + vec_double2 x24_v = spu_splats(x24); + vec_double2 r24_v = spu_splats(r24); + vec_llong2 e24_v = exp_v; + + vec_llong2 keep25_v = exp_v; + vec_double2 x25_v = spu_splats(x25); + vec_double2 r25_v = spu_splats(r25); + vec_llong2 e25_v = exp_v; + + TEST_START("frexpd2"); + + exp_v = keep0_v; + res_v = (vec_double2)frexpd2(x0_v, &exp_v); + TEST_CHECK("20060907000000AAN", allnan_double2( res_v ), 0); (void)r0_v; + TEST_CHECK("20060907000000AAN", allequal_llong2( exp_v, e0_v ), 0); + exp_v = keep1_v; + res_v = (vec_double2)frexpd2(x1_v, &exp_v); + TEST_CHECK("20060907000001AAN", allequal_double2( res_v, r1_v ), 0); + TEST_CHECK("20060907000001AAN", allequal_llong2( exp_v, e1_v ), 0); + res_v = (vec_double2)frexpd2(x2_v, &exp_v); + TEST_CHECK("20060907000002AAN", allequal_double2( res_v, r2_v ), 0); + TEST_CHECK("20060907000002AAN", allequal_llong2( exp_v, e2_v ), 0); + res_v = (vec_double2)frexpd2(x3_v, &exp_v); + TEST_CHECK("20060907000003AAN", allequal_double2( res_v, r3_v ), 0); + TEST_CHECK("20060907000003AAN", allequal_llong2( exp_v, e3_v ), 0); + res_v = (vec_double2)frexpd2(x4_v, &exp_v); + TEST_CHECK("20060907000004AAN", allequal_double2( res_v, r4_v ), 0); + TEST_CHECK("20060907000004AAN", allequal_llong2( exp_v, e4_v ), 0); + res_v = (vec_double2)frexpd2(x5_v, &exp_v); + TEST_CHECK("20060907000005AAN", allequal_double2( res_v, r5_v ), 0); + TEST_CHECK("20060907000005AAN", allequal_llong2( exp_v, e5_v ), 0); + res_v = (vec_double2)frexpd2(x6_v, &exp_v); + TEST_CHECK("20060907000006AAN", allequal_double2( res_v, r6_v ), 0); + TEST_CHECK("20060907000006AAN", allequal_llong2( exp_v, e6_v ), 0); + res_v = (vec_double2)frexpd2(x7_v, &exp_v); + TEST_CHECK("20060907000007AAN", allequal_double2( res_v, r7_v ), 0); + TEST_CHECK("20060907000007AAN", allequal_llong2( exp_v, e7_v ), 0); + res_v = (vec_double2)frexpd2(x8_v, &exp_v); + TEST_CHECK("20060907000008AAN", allequal_double2( res_v, r8_v ), 0); + TEST_CHECK("20060907000008AAN", allequal_llong2( exp_v, e8_v ), 0); + res_v = (vec_double2)frexpd2(x9_v, &exp_v); + TEST_CHECK("20060907000009AAN", allequal_double2( res_v, r9_v ), 0); + TEST_CHECK("20060907000009AAN", allequal_llong2( exp_v, e9_v ), 0); + res_v = (vec_double2)frexpd2(x10_v, &exp_v); + TEST_CHECK("20060907000010AAN", allequal_double2( res_v, r10_v ), 0); + TEST_CHECK("20060907000010AAN", allequal_llong2( exp_v, e10_v ), 0); + res_v = (vec_double2)frexpd2(x11_v, &exp_v); + TEST_CHECK("20060907000011AAN", allequal_double2( res_v, r11_v ), 0); + TEST_CHECK("20060907000011AAN", allequal_llong2( exp_v, e11_v ), 0); + res_v = (vec_double2)frexpd2(x12_v, &exp_v); + TEST_CHECK("20060907000012AAN", allequal_double2( res_v, r12_v ), 0); + TEST_CHECK("20060907000012AAN", allequal_llong2( exp_v, e12_v ), 0); + res_v = (vec_double2)frexpd2(x13_v, &exp_v); + TEST_CHECK("20060907000013AAN", allequal_double2( res_v, r13_v ), 0); + TEST_CHECK("20060907000013AAN", allequal_llong2( exp_v, e13_v ), 0); + res_v = (vec_double2)frexpd2(x14_v, &exp_v); + TEST_CHECK("20060907000014AAN", allequal_double2( res_v, r14_v ), 0); + TEST_CHECK("20060907000014AAN", allequal_llong2( exp_v, e14_v ), 0); + res_v = (vec_double2)frexpd2(x15_v, &exp_v); + TEST_CHECK("20060907000015AAN", allequal_double2( res_v, r15_v ), 0); + TEST_CHECK("20060907000015AAN", allequal_llong2( exp_v, e15_v ), 0); + res_v = (vec_double2)frexpd2(x16_v, &exp_v); + TEST_CHECK("20060907000016AAN", allequal_double2( res_v, r16_v ), 0); + TEST_CHECK("20060907000016AAN", allequal_llong2( exp_v, e16_v ), 0); + res_v = (vec_double2)frexpd2(x17_v, &exp_v); + TEST_CHECK("20060907000017AAN", allequal_double2( res_v, r17_v ), 0); + TEST_CHECK("20060907000017AAN", allequal_llong2( exp_v, e17_v ), 0); + res_v = (vec_double2)frexpd2(x18_v, &exp_v); + TEST_CHECK("20060907000018AAN", allequal_double2( res_v, r18_v ), 0); + TEST_CHECK("20060907000018AAN", allequal_llong2( exp_v, e18_v ), 0); + res_v = (vec_double2)frexpd2(x19_v, &exp_v); + TEST_CHECK("20060907000019AAN", allequal_double2( res_v, r19_v ), 0); + TEST_CHECK("20060907000019AAN", allequal_llong2( exp_v, e19_v ), 0); + res_v = (vec_double2)frexpd2(x20_v, &exp_v); + TEST_CHECK("20060907000020AAN", allequal_double2( res_v, r20_v ), 0); + TEST_CHECK("20060907000020AAN", allequal_llong2( exp_v, e20_v ), 0); + res_v = (vec_double2)frexpd2(x21_v, &exp_v); + TEST_CHECK("20060907000021AAN", allequal_double2( res_v, r21_v ), 0); + TEST_CHECK("20060907000021AAN", allequal_llong2( exp_v, e21_v ), 0); + res_v = (vec_double2)frexpd2(x22_v, &exp_v); + TEST_CHECK("20060907000022AAN", allequal_double2( res_v, r22_v ), 0); + TEST_CHECK("20060907000022AAN", allequal_llong2( exp_v, e22_v ), 0); + res_v = (vec_double2)frexpd2(x23_v, &exp_v); + TEST_CHECK("20060907000023AAN", allequal_double2( res_v, r23_v ), 0); + TEST_CHECK("20060907000023AAN", allequal_llong2( exp_v, e23_v ), 0); + exp_v = keep24_v; + res_v = (vec_double2)frexpd2(x24_v, &exp_v); + TEST_CHECK("20060907000024AAN", allequal_double2( res_v, r24_v ), 0); + TEST_CHECK("20060907000024AAN", allequal_llong2( exp_v, e24_v ), 0); + exp_v = keep25_v; + res_v = (vec_double2)frexpd2(x25_v, &exp_v); + TEST_CHECK("20060907000025AAN", allnan_double2( res_v ), 0); (void)r25_v; + TEST_CHECK("20060907000025AAN", allequal_llong2( exp_v, e25_v ), 0); + res_v = (vec_double2)frexpd2(x26_v, &exp_v); + TEST_CHECK("20060907000026AAN", allequal_double2( res_v, r26_v ), 0); + TEST_CHECK("20060907000026AAN", allequal_llong2( exp_v, e26_v ), 0); + res_v = (vec_double2)frexpd2(x27_v, &exp_v); + TEST_CHECK("20060907000027AAN", allequal_double2( res_v, r27_v ), 0); + TEST_CHECK("20060907000027AAN", allequal_llong2( exp_v, e27_v ), 0); + exp_v = keep28_v; + res_v = (vec_double2)frexpd2(x28_v, &exp_v); + TEST_CHECK("20060907000028AAN", allequal_ulps_double2( res_v, r28_v, 0 ), 0); + TEST_CHECK("20060907000028AAN", allequal_llong2( exp_v, e28_v ), 0); + exp_v = keep29_v; + res_v = (vec_double2)frexpd2(x29_v, &exp_v); + TEST_CHECK("20060907000029AAN", allequal_ulps_double2( res_v, r29_v, 0 ), 0); + TEST_CHECK("20060907000029AAN", allequal_llong2( exp_v, e29_v ), 0); + exp_v = keep30_v; + res_v = (vec_double2)frexpd2(x30_v, &exp_v); + TEST_CHECK("20060907000030AAN", allequal_ulps_double2( res_v, r30_v, 0 ), 0); + TEST_CHECK("20060907000000AAN", allequal_llong2( exp_v, e30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/frexpf4.c b/Extras/simdmathlibrary/spu/tests/frexpf4.c new file mode 100644 index 000000000..8654ba4f5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/frexpf4.c @@ -0,0 +1,405 @@ +/* Test frexpf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20060907000000AAN","AAN", "frexpf4"); + + // -Norm (IEEE-754: -Nan) + float x0 = hide_float(make_float(0xFFC00000)); + float r0 = hide_float(make_float(0xBF400000)); + int e0 = 129; + + // -Norm (IEEE-754: -Inf) + float x1 = hide_float(-1.0/0.0); + float r1 = hide_float(make_float(0xBF7FFFFF)); + int e1 = 129; + + // -Smax + float x2 = hide_float(make_float(0xFFFFFFFF)); + float r2 = hide_float(make_float(0xBF7FFFFF)); + int e2 = 129; + + // -Norm + float x3 = hide_float(-824842.58421394); + float r3 = hide_float(make_float(0xBF4960A9)); + int e3 = 20; + + // -Smin + float x4 = hide_float(make_float(0x80800000)); + float r4 = hide_float(make_float(0xBF000000)); + int e4 = -125; + + // -Denorm + float x5 = hide_float(make_float(0x807AAAAA)); + float r5 = 0.0; + int e5 = 0; + + // -Unf + float x6 = hide_float(-1.0e-999); + float r6 = 0.0; + int e6 = 0; + + // -0 + float x7 = hide_float(-0.0); + float r7 = 0.0; + int e7 = 0; + + // 0 + float x8 = hide_float( 0.0); + float r8 = 0.0; + int e8 = 0; + + // +Unf + float x9 = hide_float( 1.0e-999); + float r9 = 0.0; + int e9 = 0; + + // +Denorm + float x10 = hide_float(make_float(0x007AAAAA)); + float r10 = 0.0; + int e10 = 0; + + // +Smin + float x11 = hide_float(make_float(0x00800000)); + float r11 = hide_float(make_float(0x3F000000)); + int e11 = -125; + + // +Norm + float x12 = hide_float(3.14152634); + float r12 = hide_float(make_float(0x3F490EC4)); + int e12 = 2; + + // +Norm + float x13 = hide_float(7.0673903e37); + float r13 = hide_float(make_float(0x3F54AD32)); + int e13 = 126; + + // +Norm + float x14 = hide_float(2.4673e14); + float r14 = hide_float(make_float(0x3F60664E)); + int e14 = 48; + + // +Norm + float x15 = hide_float(7.235672e-25); + float r15 = hide_float(make_float(0x3F5FEEE6)); + int e15 = -80; + + // +Norm + float x16 = hide_float(9.452854e17); + float r16 = hide_float(make_float(0x3F51E541)); + int e16 = 60; + + // +Norm + float x17 = hide_float(3.045784e-18); + float r17 = hide_float(make_float(0x3F60BD3C)); + int e17 = -58; + + // -Norm + float x18 = hide_float(-6.459273e7); + float r18 = hide_float(make_float(0xBF7666D6)); + int e18 = 26; + + // -Norm + float x19 = hide_float(-2.493472e-9); + float r19 = hide_float(make_float(0xBF2B59A0)); + int e19 = -28; + + // -Norm + float x20 = hide_float(-1.4824543e28); + float r20 = hide_float(make_float(0xBF3F9A4C)); + int e20 = 94; + + // -Norm + float x21 = hide_float(-5.53856231e-27); + float r21 = hide_float(make_float(0xBF5B67B2)); + int e21 = -87; + + // -Norm + float x22 = hide_float(-1.000001); + float r22 = hide_float(make_float(0xBF000008)); + int e22 = 1; + + // +Smax + float x23 = hide_float(make_float(0x7FFFFFFF)); + float r23 = hide_float(make_float(0x3F7FFFFF)); + int e23 = 129; + + //+Norm (IEEE-754: +Inf) + float x24 = hide_float( 1.0/0.0); + float r24 = hide_float(make_float(0x3F7FFFFF)); + int e24 = 129; + + //+Norm (IEEE-754: +Nan) + float x25 = hide_float(make_float(0x7FC00000)); + float r25 = hide_float(make_float(0x3F400000)); + int e25 = 129; + + // Compound + vec_float4 x26_v = (vec_float4) { -2.561286432e10, make_float(0x7FFFFFFF), -1.0e-999, 7.235672e-25 }; + vec_float4 r26_v = (vec_float4) { make_float(0xBF3ED4A9), make_float(0x3F7FFFFF), 0.0, make_float(0x3F5FEEE6) }; + vec_int4 e26_v = (vec_int4) { 35, 129, 0, -80 }; + + // Compound + vec_float4 x27_v = (vec_float4) { 345.27533f, 7.0673903e37, -0.0f, -2.40e-310 }; + vec_float4 r27_v = (vec_float4) { make_float(0x3F2CA33E), make_float(0x3F54AD32), 0.0, 0.0}; + vec_int4 e27_v = (vec_int4) { 9, 126, 0, 0 }; + + // Compound + vec_float4 x28_v = (vec_float4) { make_float(0x7FC00000), -824842.58421394f, -0.0f, -3678342.8765343f }; + vec_float4 r28_v = (vec_float4) { make_float(0x3F400000), make_float(0xBF4960A9), 0.0, make_float(0xBF60821B) }; + vec_int4 e28_v = (vec_int4) { 129, 20, 0, 22 }; + + // Compound + vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_float4 r29_v = (vec_float4) { make_float(0x3F7FFFFF), 0.0, make_float(0xBF5B67B2), make_float(0xBF400000) }; + vec_int4 e29_v = (vec_int4) { 129, 0, -87, 129 }; + + // Compound + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_float4 r30_v = (vec_float4) { 0.0, make_float(0xBF0DABC6 ), make_float(0x3F60BD3C), make_float(0xBF7FFFFF) }; + vec_int4 e30_v = (vec_int4) { 0, -62, -58, 129 }; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 r0_v = spu_splats(r0); + vec_int4 e0_v = spu_splats(e0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 r1_v = spu_splats(r1); + vec_int4 e1_v = spu_splats(e1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 r2_v = spu_splats(r2); + vec_int4 e2_v = spu_splats(e2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 r3_v = spu_splats(r3); + vec_int4 e3_v = spu_splats(e3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 r4_v = spu_splats(r4); + vec_int4 e4_v = spu_splats(e4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 r5_v = spu_splats(r5); + vec_int4 e5_v = spu_splats(e5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 r6_v = spu_splats(r6); + vec_int4 e6_v = spu_splats(e6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 r7_v = spu_splats(r7); + vec_int4 e7_v = spu_splats(e7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 r8_v = spu_splats(r8); + vec_int4 e8_v = spu_splats(e8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 r9_v = spu_splats(r9); + vec_int4 e9_v = spu_splats(e9); + + vec_float4 x10_v = spu_splats(x10); + vec_float4 r10_v = spu_splats(r10); + vec_int4 e10_v = spu_splats(e10); + + vec_float4 x11_v = spu_splats(x11); + vec_float4 r11_v = spu_splats(r11); + vec_int4 e11_v = spu_splats(e11); + + vec_float4 x12_v = spu_splats(x12); + vec_float4 r12_v = spu_splats(r12); + vec_int4 e12_v = spu_splats(e12); + + vec_float4 x13_v = spu_splats(x13); + vec_float4 r13_v = spu_splats(r13); + vec_int4 e13_v = spu_splats(e13); + + vec_float4 x14_v = spu_splats(x14); + vec_float4 r14_v = spu_splats(r14); + vec_int4 e14_v = spu_splats(e14); + + vec_float4 x15_v = spu_splats(x15); + vec_float4 r15_v = spu_splats(r15); + vec_int4 e15_v = spu_splats(e15); + + vec_float4 x16_v = spu_splats(x16); + vec_float4 r16_v = spu_splats(r16); + vec_int4 e16_v = spu_splats(e16); + + vec_float4 x17_v = spu_splats(x17); + vec_float4 r17_v = spu_splats(r17); + vec_int4 e17_v = spu_splats(e17); + + vec_float4 x18_v = spu_splats(x18); + vec_float4 r18_v = spu_splats(r18); + vec_int4 e18_v = spu_splats(e18); + + vec_float4 x19_v = spu_splats(x19); + vec_float4 r19_v = spu_splats(r19); + vec_int4 e19_v = spu_splats(e19); + + vec_float4 x20_v = spu_splats(x20); + vec_float4 r20_v = spu_splats(r20); + vec_int4 e20_v = spu_splats(e20); + + vec_float4 x21_v = spu_splats(x21); + vec_float4 r21_v = spu_splats(r21); + vec_int4 e21_v = spu_splats(e21); + + vec_float4 x22_v = spu_splats(x22); + vec_float4 r22_v = spu_splats(r22); + vec_int4 e22_v = spu_splats(e22); + + vec_float4 x23_v = spu_splats(x23); + vec_float4 r23_v = spu_splats(r23); + vec_int4 e23_v = spu_splats(e23); + + vec_float4 x24_v = spu_splats(x24); + vec_float4 r24_v = spu_splats(r24); + vec_int4 e24_v = spu_splats(e24); + + vec_float4 x25_v = spu_splats(x25); + vec_float4 r25_v = spu_splats(r25); + vec_int4 e25_v = spu_splats(e25); + + vec_float4 res_v; + vec_int4 exp_v; + + TEST_START("frexpf4"); + + res_v = (vec_float4)frexpf4(x0_v, &exp_v); + TEST_CHECK("20060907000000AAN", allequal_ulps_float4( res_v, r0_v, 0 ), 0); + TEST_CHECK("20060907000000AAN", allequal_int4( exp_v, e0_v ), 0); + res_v = (vec_float4)frexpf4(x1_v, &exp_v); + TEST_CHECK("20060907000001AAN", allequal_ulps_float4( res_v, r1_v, 0 ), 0); + TEST_CHECK("20060907000001AAN", allequal_int4( exp_v, e1_v ), 0); + res_v = (vec_float4)frexpf4(x2_v, &exp_v); + TEST_CHECK("20060907000002AAN", allequal_ulps_float4( res_v, r2_v, 0 ), 0); + TEST_CHECK("20060907000002AAN", allequal_int4( exp_v, e2_v ), 0); + res_v = (vec_float4)frexpf4(x3_v, &exp_v); + TEST_CHECK("20060907000003AAN", allequal_float4( res_v, r3_v ), 0); + TEST_CHECK("20060907000003AAN", allequal_int4( exp_v, e3_v ), 0); + res_v = (vec_float4)frexpf4(x4_v, &exp_v); + TEST_CHECK("20060907000004AAN", allequal_float4( res_v, r4_v ), 0); + TEST_CHECK("20060907000004AAN", allequal_int4( exp_v, e4_v ), 0); + res_v = (vec_float4)frexpf4(x5_v, &exp_v); + TEST_CHECK("20060907000005AAN", allequal_float4( res_v, r5_v ), 0); + TEST_CHECK("20060907000005AAN", allequal_int4( exp_v, e5_v ), 0); + res_v = (vec_float4)frexpf4(x6_v, &exp_v); + TEST_CHECK("20060907000006AAN", allequal_float4( res_v, r6_v ), 0); + TEST_CHECK("20060907000006AAN", allequal_int4( exp_v, e6_v ), 0); + res_v = (vec_float4)frexpf4(x7_v, &exp_v); + TEST_CHECK("20060907000007AAN", allequal_float4( res_v, r7_v ), 0); + TEST_CHECK("20060907000007AAN", allequal_int4( exp_v, e7_v ), 0); + res_v = (vec_float4)frexpf4(x8_v, &exp_v); + TEST_CHECK("20060907000008AAN", allequal_float4( res_v, r8_v ), 0); + TEST_CHECK("20060907000008AAN", allequal_int4( exp_v, e8_v ), 0); + res_v = (vec_float4)frexpf4(x9_v, &exp_v); + TEST_CHECK("20060907000009AAN", allequal_float4( res_v, r9_v ), 0); + TEST_CHECK("20060907000009AAN", allequal_int4( exp_v, e9_v ), 0); + res_v = (vec_float4)frexpf4(x10_v, &exp_v); + TEST_CHECK("20060907000010AAN", allequal_float4( res_v, r10_v ), 0); + TEST_CHECK("20060907000010AAN", allequal_int4( exp_v, e10_v ), 0); + res_v = (vec_float4)frexpf4(x11_v, &exp_v); + TEST_CHECK("20060907000011AAN", allequal_float4( res_v, r11_v ), 0); + TEST_CHECK("20060907000011AAN", allequal_int4( exp_v, e11_v ), 0); + res_v = (vec_float4)frexpf4(x12_v, &exp_v); + TEST_CHECK("20060907000012AAN", allequal_float4( res_v, r12_v ), 0); + TEST_CHECK("20060907000012AAN", allequal_int4( exp_v, e12_v ), 0); + res_v = (vec_float4)frexpf4(x13_v, &exp_v); + TEST_CHECK("20060907000013AAN", allequal_float4( res_v, r13_v ), 0); + TEST_CHECK("20060907000013AAN", allequal_int4( exp_v, e13_v ), 0); + res_v = (vec_float4)frexpf4(x14_v, &exp_v); + TEST_CHECK("20060907000014AAN", allequal_float4( res_v, r14_v ), 0); + TEST_CHECK("20060907000014AAN", allequal_int4( exp_v, e14_v ), 0); + res_v = (vec_float4)frexpf4(x15_v, &exp_v); + TEST_CHECK("20060907000015AAN", allequal_float4( res_v, r15_v ), 0); + TEST_CHECK("20060907000015AAN", allequal_int4( exp_v, e15_v ), 0); + res_v = (vec_float4)frexpf4(x16_v, &exp_v); + TEST_CHECK("20060907000016AAN", allequal_float4( res_v, r16_v ), 0); + TEST_CHECK("20060907000016AAN", allequal_int4( exp_v, e16_v ), 0); + res_v = (vec_float4)frexpf4(x17_v, &exp_v); + TEST_CHECK("20060907000017AAN", allequal_float4( res_v, r17_v ), 0); + TEST_CHECK("20060907000017AAN", allequal_int4( exp_v, e17_v ), 0); + res_v = (vec_float4)frexpf4(x18_v, &exp_v); + TEST_CHECK("20060907000018AAN", allequal_float4( res_v, r18_v ), 0); + TEST_CHECK("20060907000018AAN", allequal_int4( exp_v, e18_v ), 0); + res_v = (vec_float4)frexpf4(x19_v, &exp_v); + TEST_CHECK("20060907000019AAN", allequal_float4( res_v, r19_v ), 0); + TEST_CHECK("20060907000019AAN", allequal_int4( exp_v, e19_v ), 0); + res_v = (vec_float4)frexpf4(x20_v, &exp_v); + TEST_CHECK("20060907000020AAN", allequal_float4( res_v, r20_v ), 0); + TEST_CHECK("20060907000020AAN", allequal_int4( exp_v, e20_v ), 0); + res_v = (vec_float4)frexpf4(x21_v, &exp_v); + TEST_CHECK("20060907000021AAN", allequal_float4( res_v, r21_v ), 0); + TEST_CHECK("20060907000021AAN", allequal_int4( exp_v, e21_v ), 0); + res_v = (vec_float4)frexpf4(x22_v, &exp_v); + TEST_CHECK("20060907000022AAN", allequal_float4( res_v, r22_v ), 0); + TEST_CHECK("20060907000022AAN", allequal_int4( exp_v, e22_v ), 0); + res_v = (vec_float4)frexpf4(x23_v, &exp_v); + TEST_CHECK("20060907000023AAN", allequal_float4( res_v, r23_v ), 0); + TEST_CHECK("20060907000023AAN", allequal_int4( exp_v, e23_v ), 0); + res_v = (vec_float4)frexpf4(x24_v, &exp_v); + TEST_CHECK("20060907000024AAN", allequal_float4( res_v, r24_v ), 0); + TEST_CHECK("20060907000024AAN", allequal_int4( exp_v, e24_v ), 0); + res_v = (vec_float4)frexpf4(x25_v, &exp_v); + TEST_CHECK("20060907000025AAN", allequal_float4( res_v, r25_v ), 0); + TEST_CHECK("20060907000025AAN", allequal_int4( exp_v, e25_v ), 0); + res_v = (vec_float4)frexpf4(x26_v, &exp_v); + TEST_CHECK("20060907000026AAN", allequal_float4( res_v, r26_v ), 0); + TEST_CHECK("20060907000026AAN", allequal_int4( exp_v, e26_v ), 0); + res_v = (vec_float4)frexpf4(x27_v, &exp_v); + TEST_CHECK("20060907000027AAN", allequal_float4( res_v, r27_v ), 0); + TEST_CHECK("20060907000027AAN", allequal_int4( exp_v, e27_v ), 0); + res_v = (vec_float4)frexpf4(x28_v, &exp_v); + TEST_CHECK("20060907000028AAN", allequal_float4( res_v, r28_v ), 0); + TEST_CHECK("20060907000028AAN", allequal_int4( exp_v, e28_v ), 0); + res_v = (vec_float4)frexpf4(x29_v, &exp_v); + TEST_CHECK("20060907000029AAN", allequal_float4( res_v, r29_v ), 0); + TEST_CHECK("20060907000029AAN", allequal_int4( exp_v, e29_v ), 0); + res_v = (vec_float4)frexpf4(x30_v, &exp_v); + TEST_CHECK("20060907000030AAN", allequal_float4( res_v, r30_v ), 0); + TEST_CHECK("20060907000030AAN", allequal_int4( exp_v, e30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/hypotd2.c b/Extras/simdmathlibrary/spu/tests/hypotd2.c new file mode 100644 index 000000000..de1ab17d7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/hypotd2.c @@ -0,0 +1,341 @@ +/* Test hypotd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060831000000AAN","AAN", "hypotd2"); + + // Does not test precesion, which depends on sqrtd2 implementation + // Uses Pythagorean triplets to test result validity + + //-QNaN, Norm + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + double r0 = hide_double( nan("")); + + //-QNaN, -Norm + double x1 = hide_double(-nan("")); + double y1 = hide_double(-21345853556.492); + double r1 = hide_double( nan("")); + + //-Inf, -QNaN + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(make_double(0xFFFFFFFFFFFFFFFFull)); + double r2 = hide_double( nan("")); + + //-Norm, -SNaN + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(make_double(0xFFF7FFFFFFFFFFFFull)); + double r3 = hide_double( nan("")); + + //-Norm, -Denorm + double x4 = hide_double(-4.0e120); + double y4 = hide_double(-3.0e-320); + double r4 = hide_double( 4.0e120); + + //-Norm, -Inf + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-1.0/0.0); + double r5 = hide_double( 1.0/0.0); + + //+Inf, -Inf + double x6 = hide_double( 1.0/0.0); + double y6 = hide_double(-1.0/0.0); + double r6 = hide_double( 1.0/0.0); + + //-Norm, -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + double r7 = hide_double( 168.97345223013); + + //-Unf, -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + double r8 = hide_double( 83532.96153153); + + //-Unf, 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + double r9 = hide_double(0.0); + + //QNaN, 0 + double x10 = hide_double(make_double(0x7FFFFFFFFFFFFFFFull)); + double y10 = hide_double( 0.0); + double r10 = hide_double( nan("")); + + //+Unf, +QNaN + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( nan("")); + double r11 = hide_double( nan("")); + + //+Unf, +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + double r12 = hide_double(0.0031529324); + + //+Norm, +Norm + double x13 = hide_double(55000.0e116); + double y13 = hide_double(480.0e118); + double r13 = hide_double(7.3e120); + + //+Norm, -Denorm + double x14 = hide_double(120.0e120); + double y14 = hide_double(-5.0e-321); + double r14 = hide_double(120.0e120/*1.3e101*/); + + //-Norm, +Demorm + double x15 = hide_double(-0.0000000008); + double y15 = hide_double(1.5e-320); + double r15 = hide_double( 0.8e-9); + + //+Norm, -Norm + double x16 = hide_double( 7.0e-6); + double y16 = hide_double(-24.0e-6); + double r16 = hide_double(25.0e-6); + + //+Norm, +Norm + double x17 = hide_double(0.0055); + double y17 = hide_double(0.0048); + double r17 = hide_double(0.0073); + + //+Denorm, +Norm + double x18 = hide_double(4.5e-320); + double y18 = hide_double(2.8); + double r18 = hide_double(2.8); + + //-Norm, +Norm + double x19 = hide_double(-8000.0); + double y19 = hide_double(39.0e2); + double r19 = hide_double(8900.0); + + //+Norm, +Norm + double x20 = hide_double(6.5e128); + double y20 = hide_double(7.2e128); + double r20 = hide_double(9.7e128); + + //-Norm, -Norm + double x21 = hide_double(-0.0035); + double y21 = hide_double(-1.2e-3); + double r21 = hide_double( 0.0037); + + //+Norm, +Norm + double x22 = hide_double(456548.6027761); + double y22 = hide_double(106165.2293520); + double r22 = hide_double(468729.8610289); + + //+Inf, +Ovf + double x23 = hide_double( 1.0/0.0); + double y23 = hide_double( 1.0e999); + double r23 = hide_double( 1.0/0.0); + + //+Norm, +QNaN + double x24 = hide_double(264.345643345); + double y24 = hide_double(make_double(0x7FFAAAAAAAAAAAAAull)); + double r24 = hide_double( nan("")); + + //+Inf, +QNaN + double x25 = hide_double( 1.0/0.0); + double y25 = hide_double(nan("")); + double r25 = hide_double(nan("")); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_double2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_double2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_double2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_double2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_double2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_double2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_double2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_double2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_double2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_double2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_double2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_double2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_double2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_double2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_double2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_double2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_double2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_double2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_double2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_double2 r19_v = spu_splats(r19); + + vec_double2 x20_v = spu_splats(x20); + vec_double2 y20_v = spu_splats(y20); + vec_double2 r20_v = spu_splats(r20); + + vec_double2 x21_v = spu_splats(x21); + vec_double2 y21_v = spu_splats(y21); + vec_double2 r21_v = spu_splats(r21); + + vec_double2 x22_v = spu_splats(x22); + vec_double2 y22_v = spu_splats(y22); + vec_double2 r22_v = spu_splats(r22); + + vec_double2 x23_v = spu_splats(x23); + vec_double2 y23_v = spu_splats(y23); + vec_double2 r23_v = spu_splats(r23); + + vec_double2 x24_v = spu_splats(x24); + vec_double2 y24_v = spu_splats(y24); + vec_double2 r24_v = spu_splats(r24); + + vec_double2 x25_v = spu_splats(x25); + vec_double2 y25_v = spu_splats(y25); + vec_double2 r25_v = spu_splats(r25); + + vec_double2 res_v; + int tolerance = (int)0x0000000000000001ull; + + TEST_START("hypotd2"); + + res_v = (vec_double2)hypotd2(x0_v, y0_v); + TEST_CHECK("20060831000000AAN", allnan_double2( res_v ), 0); (void)r0_v; + res_v = (vec_double2)hypotd2(x1_v, y1_v); + TEST_CHECK("20060831000001AAN", allnan_double2( res_v ), 0); (void)r1_v; + res_v = (vec_double2)hypotd2(x2_v, y2_v); + TEST_CHECK("20060831000002AAN", allnan_double2( res_v ), 0); (void)r2_v; + res_v = (vec_double2)hypotd2(x3_v, y3_v); + TEST_CHECK("20060831000003AAN", allnan_double2( res_v ), 0); (void)r3_v; + res_v = (vec_double2)hypotd2(x4_v, y4_v); + TEST_CHECK("20060831000004AAN", allequal_ulps_double2( res_v, r4_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x5_v, y5_v); + TEST_CHECK("20060831000005AAN", allequal_ulps_double2( res_v, r5_v, 0 ), 0); + res_v = (vec_double2)hypotd2(x6_v, y6_v); + TEST_CHECK("20060831000006AAN", allequal_ulps_double2( res_v, r6_v, 0 ), 0); + res_v = (vec_double2)hypotd2(x7_v, y7_v); + TEST_CHECK("20060831000007AAN", allequal_ulps_double2( res_v, r7_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x8_v, y8_v); + TEST_CHECK("20060831000008AAN", allequal_ulps_double2( res_v, r8_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x9_v, y9_v); + TEST_CHECK("20060831000009AAN", allequal_ulps_double2( res_v, r9_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x10_v, y10_v); + TEST_CHECK("20060831000010AAN", allnan_double2( res_v ), 0); (void)r10_v; + res_v = (vec_double2)hypotd2(x11_v, y11_v); + TEST_CHECK("20060831000011AAN", allnan_double2( res_v ), 0); (void)r11_v; + res_v = (vec_double2)hypotd2(x12_v, y12_v); + TEST_CHECK("20060831000012AAN", allequal_ulps_double2( res_v, r12_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x13_v, y13_v); + TEST_CHECK("20060831000013AAN", allequal_ulps_double2( res_v, r13_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x14_v, y14_v); + TEST_CHECK("20060831000014AAN", allequal_ulps_double2( res_v, r14_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x15_v, y15_v); + TEST_CHECK("20060831000015AAN", allequal_ulps_double2( res_v, r15_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x16_v, y16_v); + TEST_CHECK("20060831000016AAN", allequal_ulps_double2( res_v, r16_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x17_v, y17_v); + TEST_CHECK("20060831000017AAN", allequal_ulps_double2( res_v, r17_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x18_v, y18_v); + TEST_CHECK("20060831000018AAN", allequal_ulps_double2( res_v, r18_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x19_v, y19_v); + TEST_CHECK("20060831000019AAN", allequal_ulps_double2( res_v, r19_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x20_v, y20_v); + TEST_CHECK("20060831000020AAN", allequal_ulps_double2( res_v, r20_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x21_v, y21_v); + TEST_CHECK("20060831000021AAN", allequal_ulps_double2( res_v, r21_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x22_v, y22_v); + TEST_CHECK("20060831000022AAN", allequal_ulps_double2( res_v, r22_v, tolerance ), 0); + res_v = (vec_double2)hypotd2(x23_v, y23_v); + TEST_CHECK("20060831000023AAN", allequal_ulps_double2( res_v, r23_v, 0 ), 0); + res_v = (vec_double2)hypotd2(x24_v, y24_v); + TEST_CHECK("20060831000024AAN", allnan_double2( res_v ), 0); (void)r24_v; + res_v = (vec_double2)hypotd2(x25_v, y25_v); + TEST_CHECK("20060831000025AAN", allnan_double2( res_v ), 0); (void)r25_v; + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/hypotf4.c b/Extras/simdmathlibrary/spu/tests/hypotf4.c new file mode 100644 index 000000000..a71f798b8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/hypotf4.c @@ -0,0 +1,208 @@ +/* Test hypotf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060831000000AAN","AAN", "hypotf4"); + + // Does not test precesion, which depends on sqrtf4 implementation + // Uses Pythagorean triplets to test result validity + + //-Norm, -0 + float x0 = hide_float(-168.97345223013f); + float y0 = hide_float(-0.0); + float r0 = hide_float( 168.97345223013f); + + //-Unf, -Norm + float x1 = hide_float(-1.0e-999); + float y1 = hide_float(-83532.96153153f); + float r1 = hide_float( 83532.96153153f); + + //-Unf, 0 + float x2 = hide_float(-1.0e-999); + float y2 = hide_float( 0.0); + float r2 = hide_float( 0.0); + + //+Unf, +Norm + float x3 = hide_float(1.0e-999); + float y3 = hide_float(0.0031529324f); + float r3 = hide_float(0.0031529324f); + + //+Norm, +Norm + float x4 = hide_float(5.5e12); + float y4 = hide_float(4.8e12); + float r4 = hide_float(7.3e12); + + //+Norm, -Denorm + float x5 = hide_float(12.0e12); + float y5 = hide_float(make_float(0x803AAAAA)); + float r5 = hide_float(12.0e12); + + //-Norm, +Norm + float x6 = hide_float(-0.0000000008); + float y6 = hide_float( 0.0000000015); + float r6 = hide_float( 0.0000000017); + + //+Norm, -Norm + float x7 = hide_float( 7.0e-6); + float y7 = hide_float(-24.0e-6); + float r7 = hide_float( 25.0e-6); + + //+Norm, +Norm + float x8 = hide_float(0.0055f); + float y8 = hide_float(0.0048f); + float r8 = hide_float(0.0073f); + + //+Denorm, +Norm + float x9 = hide_float(make_float(0x007AAAAA)); + float y9 = hide_float(2.8f); + float r9 = hide_float(2.8f); + + //-Norm, +Norm + float x10 = hide_float(-8000.0); + float y10 = hide_float(39.0e2); + float r10 = hide_float(8900.0); + + //+Norm, +Norm + float x11 = hide_float(6.5e16); + float y11 = hide_float(7.2e16); + float r11 = hide_float(9.7e16); + + //-Norm, -Norm + float x12 = hide_float(-0.0035); + float y12 = hide_float(-12e-4); + float r12 = hide_float(3700e-6); + + //+Norm, +Norm + float x13 = hide_float(456548.6027761f); + float y13 = hide_float(106165.2293520f); + float r13 = hide_float(468729.8610289f); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_float4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_float4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_float4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_float4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_float4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_float4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_float4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_float4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_float4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_float4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_float4 y10_v = spu_splats(y10); + vec_float4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_float4 y11_v = spu_splats(y11); + vec_float4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_float4 y12_v = spu_splats(y12); + vec_float4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_float4 y13_v = spu_splats(y13); + vec_float4 r13_v = spu_splats(r13); + + vec_float4 res_v; + int tolerance = 0x00000001; + + TEST_START("hypotf4"); + + res_v = (vec_float4)hypotf4(x0_v, y0_v); + TEST_CHECK("20060831000000AAN", allequal_ulps_float4( res_v, r0_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x1_v, y1_v); + TEST_CHECK("20060831000001AAN", allequal_ulps_float4( res_v, r1_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x2_v, y2_v); + TEST_CHECK("20060831000002AAN", allequal_ulps_float4( res_v, r2_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x3_v, y3_v); + TEST_CHECK("20060831000003AAN", allequal_ulps_float4( res_v, r3_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x4_v, y4_v); + TEST_CHECK("20060831000004AAN", allequal_ulps_float4( res_v, r4_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x5_v, y5_v); + TEST_CHECK("20060831000005AAN", allequal_ulps_float4( res_v, r5_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x6_v, y6_v); + TEST_CHECK("20060831000006AAN", allequal_ulps_float4( res_v, r6_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x7_v, y7_v); + TEST_CHECK("20060831000007AAN", allequal_ulps_float4( res_v, r7_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x8_v, y8_v); + TEST_CHECK("20060831000008AAN", allequal_ulps_float4( res_v, r8_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x9_v, y9_v); + TEST_CHECK("20060831000009AAN", allequal_ulps_float4( res_v, r9_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x10_v, y10_v); + TEST_CHECK("20060831000010AAN", allequal_ulps_float4( res_v, r10_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x11_v, y11_v); + TEST_CHECK("20060831000011AAN", allequal_ulps_float4( res_v, r11_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x12_v, y12_v); + TEST_CHECK("20060831000012AAN", allequal_ulps_float4( res_v, r12_v, tolerance ), 0); + res_v = (vec_float4)hypotf4(x13_v, y13_v); + TEST_CHECK("20060831000013AAN", allequal_ulps_float4( res_v, r13_v, tolerance ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/ilogbd2.c b/Extras/simdmathlibrary/spu/tests/ilogbd2.c new file mode 100644 index 000000000..1bc449d09 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/ilogbd2.c @@ -0,0 +1,322 @@ +/* Test ilogbd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 ((int)0x80000001) +#endif +#ifndef FP_ILOGBNAN +#define FP_ILOGBNAN ((int)0x7FFFFFFF) +#endif + +int main() +{ + TEST_SET_START("20060904000000AAN","AAN", "ilogbd2"); + + // -Nan + double x0 = hide_double(-nan("")); + long long r0 = (long long)FP_ILOGBNAN; + + // -Inf + double x1 = hide_double(-1.0/0.0); + long long r1 = (long long)FP_ILOGB0; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + long long r2 = 1023ll; + + // -Norm + double x3 = hide_double(-824842.58421394); + long long r3 = 19ll; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + long long r4 = -1022ll; + + // -Denorm + double x5 = hide_double(-2.40e-310); + long long r5 = -1029ll; + + // -Unf + double x6 = hide_double(-1.0e-999); + long long r6 = (long long)FP_ILOGB0; + + // -0 + double x7 = hide_double(-0.0); + long long r7 = (long long)FP_ILOGB0; + + // 0 + double x8 = hide_double( 0.0); + long long r8 = (long long)FP_ILOGB0; + + // +Unf + double x9 = hide_double( 1.0e-999); + long long r9 = (long long)FP_ILOGB0; + + // +Denorm + double x10 = hide_double( 2.40e-310); + long long r10 = -1029ll; + + // +Dmin + double x11 = hide_double( DBL_MIN); + long long r11 = -1022ll; + + // +Norm + double x12 = hide_double(3.14152634); + long long r12 = 1ll; + + // +Norm + double x13 = hide_double(7.0673903e149); + long long r13 = 497ll; + + // +Norm + double x14 = hide_double(2.4673e304); + long long r14 = 1011ll; + + // +Norm + double x15 = hide_double(7.235672e-25); + long long r15 = -81ll; + + // +Denorm + double x16 = hide_double(9.452854e-312); + long long r16 = -1034ll; + + // +Demorm + double x17 = hide_double(3.045784e-320); + long long r17 = -1062ll; + + // -Norm + double x18 = hide_double(-6.459273e7); + long long r18 = 25ll; + + // -Norm + double x19 = hide_double(-2.493472e-99); + long long r19 = -328ll; + + // -Norm + double x20 = hide_double(-1.4824543e128); + long long r20 = 425ll; + + // -Denorm + double x21 = hide_double(-5.53856231e-315); + long long r21 = -1044ll; + + // -Demorm + double x22 = hide_double(-2.5684367e-312); + long long r22 = -1036ll; + + // +Dmax + double x23 = hide_double(DBL_MAX); + long long r23 = 1023ll; + + // +Inf + double x24 = hide_double( 1.0/0.0); + long long r24 = (long long)FP_ILOGB0; + + //+Nan + double x25 = hide_double( nan("")); + long long r25 = (long long)FP_ILOGBNAN; + + // Compound + vec_double2 x26_v = (vec_double2) { -2.561286432e-317, -1.0e-999 }; + vec_llong2 r26_v = (vec_llong2) { -1052ll, FP_ILOGB0 }; + + // Compound + vec_double2 x27_v = (vec_double2) { 345.27533, -8.673e-310 }; + vec_llong2 r27_v = (vec_llong2) { 8ll, -1027ll }; + + // Compound + vec_double2 x28_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_llong2 r28_v = (vec_llong2) { FP_ILOGBNAN, 21ll }; + + // Compound + vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_llong2 r29_v = (vec_llong2) { FP_ILOGB0, FP_ILOGBNAN }; + + // Compound + vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; + vec_llong2 r30_v = (vec_llong2) { -329ll, FP_ILOGB0 }; + + vec_double2 x0_v = spu_splats(x0); + vec_llong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_llong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_llong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_llong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_llong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_llong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_llong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_llong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_llong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_llong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_llong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_llong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_llong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_llong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_llong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_llong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_llong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_llong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_llong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_llong2 r19_v = spu_splats(r19); + + vec_double2 x20_v = spu_splats(x20); + vec_llong2 r20_v = spu_splats(r20); + + vec_double2 x21_v = spu_splats(x21); + vec_llong2 r21_v = spu_splats(r21); + + vec_double2 x22_v = spu_splats(x22); + vec_llong2 r22_v = spu_splats(r22); + + vec_double2 x23_v = spu_splats(x23); + vec_llong2 r23_v = spu_splats(r23); + + vec_double2 x24_v = spu_splats(x24); + vec_llong2 r24_v = spu_splats(r24); + + vec_double2 x25_v = spu_splats(x25); + vec_llong2 r25_v = spu_splats(r25); + + vec_llong2 res_v; + + TEST_START("ilogbd2"); + + res_v = (vec_llong2)ilogbd2(x0_v); + TEST_CHECK("20060904000000AAN", allequal_llong2( res_v, r0_v ), 0); + res_v = (vec_llong2)ilogbd2(x1_v); + TEST_CHECK("20060904000001AAN", allequal_llong2( res_v, r1_v ), 0); + res_v = (vec_llong2)ilogbd2(x2_v); + TEST_CHECK("20060904000002AAN", allequal_llong2( res_v, r2_v ), 0); + res_v = (vec_llong2)ilogbd2(x3_v); + TEST_CHECK("20060904000003AAN", allequal_llong2( res_v, r3_v ), 0); + res_v = (vec_llong2)ilogbd2(x4_v); + TEST_CHECK("20060904000004AAN", allequal_llong2( res_v, r4_v ), 0); + res_v = (vec_llong2)ilogbd2(x5_v); + TEST_CHECK("20060904000005AAN", allequal_llong2( res_v, r5_v ), 0); + res_v = (vec_llong2)ilogbd2(x6_v); + TEST_CHECK("20060904000006AAN", allequal_llong2( res_v, r6_v ), 0); + res_v = (vec_llong2)ilogbd2(x7_v); + TEST_CHECK("20060904000007AAN", allequal_llong2( res_v, r7_v ), 0); + res_v = (vec_llong2)ilogbd2(x8_v); + TEST_CHECK("20060904000008AAN", allequal_llong2( res_v, r8_v ), 0); + res_v = (vec_llong2)ilogbd2(x9_v); + TEST_CHECK("20060904000009AAN", allequal_llong2( res_v, r9_v ), 0); + res_v = (vec_llong2)ilogbd2(x10_v); + TEST_CHECK("20060904000010AAN", allequal_llong2( res_v, r10_v ), 0); + res_v = (vec_llong2)ilogbd2(x11_v); + TEST_CHECK("20060904000011AAN", allequal_llong2( res_v, r11_v ), 0); + res_v = (vec_llong2)ilogbd2(x12_v); + TEST_CHECK("20060904000012AAN", allequal_llong2( res_v, r12_v ), 0); + res_v = (vec_llong2)ilogbd2(x13_v); + TEST_CHECK("20060904000013AAN", allequal_llong2( res_v, r13_v ), 0); + res_v = (vec_llong2)ilogbd2(x14_v); + TEST_CHECK("20060904000014AAN", allequal_llong2( res_v, r14_v ), 0); + res_v = (vec_llong2)ilogbd2(x15_v); + TEST_CHECK("20060904000015AAN", allequal_llong2( res_v, r15_v ), 0); + res_v = (vec_llong2)ilogbd2(x16_v); + TEST_CHECK("20060904000016AAN", allequal_llong2( res_v, r16_v ), 0); + res_v = (vec_llong2)ilogbd2(x17_v); + TEST_CHECK("20060904000017AAN", allequal_llong2( res_v, r17_v ), 0); + res_v = (vec_llong2)ilogbd2(x18_v); + TEST_CHECK("20060904000018AAN", allequal_llong2( res_v, r18_v ), 0); + res_v = (vec_llong2)ilogbd2(x19_v); + TEST_CHECK("20060904000019AAN", allequal_llong2( res_v, r19_v ), 0); + res_v = (vec_llong2)ilogbd2(x20_v); + TEST_CHECK("20060904000020AAN", allequal_llong2( res_v, r20_v ), 0); + res_v = (vec_llong2)ilogbd2(x21_v); + TEST_CHECK("20060904000021AAN", allequal_llong2( res_v, r21_v ), 0); + res_v = (vec_llong2)ilogbd2(x22_v); + TEST_CHECK("20060904000022AAN", allequal_llong2( res_v, r22_v ), 0); + res_v = (vec_llong2)ilogbd2(x23_v); + TEST_CHECK("20060904000023AAN", allequal_llong2( res_v, r23_v ), 0); + res_v = (vec_llong2)ilogbd2(x24_v); + TEST_CHECK("20060904000024AAN", allequal_llong2( res_v, r24_v ), 0); + res_v = (vec_llong2)ilogbd2(x25_v); + TEST_CHECK("20060904000025AAN", allequal_llong2( res_v, r25_v ), 0); + res_v = (vec_llong2)ilogbd2(x26_v); + TEST_CHECK("20060904000026AAN", allequal_llong2( res_v, r26_v ), 0); + res_v = (vec_llong2)ilogbd2(x27_v); + TEST_CHECK("20060904000027AAN", allequal_llong2( res_v, r27_v ), 0); + res_v = (vec_llong2)ilogbd2(x28_v); + TEST_CHECK("20060904000028AAN", allequal_llong2( res_v, r28_v ), 0); + res_v = (vec_llong2)ilogbd2(x29_v); + TEST_CHECK("20060904000029AAN", allequal_llong2( res_v, r29_v ), 0); + res_v = (vec_llong2)ilogbd2(x30_v); + TEST_CHECK("20060904000030AAN", allequal_llong2( res_v, r30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/ilogbf4.c b/Extras/simdmathlibrary/spu/tests/ilogbf4.c new file mode 100644 index 000000000..e76928ea5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/ilogbf4.c @@ -0,0 +1,320 @@ +/* Test ilogbf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 ((int)0x80000001) +#endif + +int main() +{ + TEST_SET_START("20060904000000AAN","AAN", "ilogbf4"); + + // -Norm (IEEE-754: -Nan) + float x0 = hide_float(make_float(0xFFC00000)); + int r0 = 128; + + // -Norm (IEEE-754: -Inf) + float x1 = hide_float(-1.0/0.0); + int r1 = 128; + + // -Smax + float x2 = hide_float(-FLT_MAX); + int r2 = 128; + + // -Norm + float x3 = hide_float(-824842.58421394); + int r3 = 19; + + // -Smin + float x4 = hide_float(make_float(0x80800000)); + int r4 = -126; + + // -Denorm + float x5 = hide_float(make_float(0x807AAAAA)); + int r5 = (int)FP_ILOGB0; + + // -Unf + float x6 = hide_float(-1.0e-999); + int r6 = (int)FP_ILOGB0; + + // -0 + float x7 = hide_float(-0.0); + int r7 = (int)FP_ILOGB0; + + // 0 + float x8 = hide_float( 0.0); + int r8 = (int)FP_ILOGB0; + + // +Unf + float x9 = hide_float( 1.0e-999); + int r9 = (int)FP_ILOGB0; + + // +Denorm + float x10 = hide_float(make_float(0x007AAAAA)); + int r10 = (int)FP_ILOGB0; + + // +Smin + float x11 = hide_float(make_float(0x00800000)); + int r11 = -126; + + // +Norm + float x12 = hide_float(3.14152634); + int r12 = 1; + + // +Norm + float x13 = hide_float(7.0673903e37); + int r13 = 125; + + // +Norm + float x14 = hide_float(2.4673e14); + int r14 = 47; + + // +Norm + float x15 = hide_float(7.235672e-25); + int r15 = -81; + + // +Norm + float x16 = hide_float(9.452854e17); + int r16 = 59; + + // +Norm + float x17 = hide_float(3.045784e-18); + int r17 = -59; + + // -Norm + float x18 = hide_float(-6.459273e7); + int r18 = 25; + + // -Norm + float x19 = hide_float(-2.493472e-9); + int r19 = -29; + + // -Norm + float x20 = hide_float(-1.4824543e28); + int r20 = 93; + + // -Norm + float x21 = hide_float(-5.53856231e-27); + int r21 = -88; + + // -Norm + float x22 = hide_float(-1.000001); + int r22 = 0; + + // +Smax + float x23 = hide_float(FLT_MAX); + int r23 = 128; + + //+Norm (IEEE-754: +Inf) + float x24 = hide_float( 1.0/0.0); + int r24 = 128; + + //+Norm (IEEE-754: +Nan) + float x25 = hide_float(make_float(0x7FC00000)); + int r25 = 128; + + // Compound + vec_float4 x26_v = (vec_float4) { -2.561286432e10, FLT_MAX, -1.0e-999, 7.235672e-25 }; + vec_int4 r26_v = (vec_int4) { 34, 128, FP_ILOGB0, -81 }; + + // Compound + vec_float4 x27_v = (vec_float4) { 345.27533, 7.0673903e37, -0.0, -2.40e-310 }; + vec_int4 r27_v = (vec_int4) { 8, 125, FP_ILOGB0, FP_ILOGB0 }; + + // Compound + vec_float4 x28_v = (vec_float4) { make_float(0x7FC00000), -824842.58421394, -0.0, -3678342.8765343 }; + vec_int4 r28_v = (vec_int4) { 128, 19, FP_ILOGB0, 21 }; + + // Compound + vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_int4 r29_v = (vec_int4) { 128, FP_ILOGB0, -88, 128 }; + + // Compound + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_int4 r30_v = (vec_int4) { FP_ILOGB0, -63, -59, 128 }; + + vec_float4 x0_v = spu_splats(x0); + vec_int4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_int4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_int4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_int4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_int4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_int4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_int4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_int4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_int4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_int4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_int4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_int4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_int4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_int4 r13_v = spu_splats(r13); + + vec_float4 x14_v = spu_splats(x14); + vec_int4 r14_v = spu_splats(r14); + + vec_float4 x15_v = spu_splats(x15); + vec_int4 r15_v = spu_splats(r15); + + vec_float4 x16_v = spu_splats(x16); + vec_int4 r16_v = spu_splats(r16); + + vec_float4 x17_v = spu_splats(x17); + vec_int4 r17_v = spu_splats(r17); + + vec_float4 x18_v = spu_splats(x18); + vec_int4 r18_v = spu_splats(r18); + + vec_float4 x19_v = spu_splats(x19); + vec_int4 r19_v = spu_splats(r19); + + vec_float4 x20_v = spu_splats(x20); + vec_int4 r20_v = spu_splats(r20); + + vec_float4 x21_v = spu_splats(x21); + vec_int4 r21_v = spu_splats(r21); + + vec_float4 x22_v = spu_splats(x22); + vec_int4 r22_v = spu_splats(r22); + + vec_float4 x23_v = spu_splats(x23); + vec_int4 r23_v = spu_splats(r23); + + vec_float4 x24_v = spu_splats(x24); + vec_int4 r24_v = spu_splats(r24); + + vec_float4 x25_v = spu_splats(x25); + vec_int4 r25_v = spu_splats(r25); + + vec_int4 res_v; + + TEST_START("ilogbf4"); + + res_v = (vec_int4)ilogbf4(x0_v); + TEST_CHECK("20060904000000AAN", allequal_int4( res_v, r0_v ), 0); + res_v = (vec_int4)ilogbf4(x1_v); + TEST_CHECK("20060904000001AAN", allequal_int4( res_v, r1_v ), 0); + res_v = (vec_int4)ilogbf4(x2_v); + TEST_CHECK("20060904000002AAN", allequal_int4( res_v, r2_v ), 0); + res_v = (vec_int4)ilogbf4(x3_v); + TEST_CHECK("20060904000003AAN", allequal_int4( res_v, r3_v ), 0); + res_v = (vec_int4)ilogbf4(x4_v); + TEST_CHECK("20060904000004AAN", allequal_int4( res_v, r4_v ), 0); + res_v = (vec_int4)ilogbf4(x5_v); + TEST_CHECK("20060904000005AAN", allequal_int4( res_v, r5_v ), 0); + res_v = (vec_int4)ilogbf4(x6_v); + TEST_CHECK("20060904000006AAN", allequal_int4( res_v, r6_v ), 0); + res_v = (vec_int4)ilogbf4(x7_v); + TEST_CHECK("20060904000007AAN", allequal_int4( res_v, r7_v ), 0); + res_v = (vec_int4)ilogbf4(x8_v); + TEST_CHECK("20060904000008AAN", allequal_int4( res_v, r8_v ), 0); + res_v = (vec_int4)ilogbf4(x9_v); + TEST_CHECK("20060904000009AAN", allequal_int4( res_v, r9_v ), 0); + res_v = (vec_int4)ilogbf4(x10_v); + TEST_CHECK("20060904000010AAN", allequal_int4( res_v, r10_v ), 0); + res_v = (vec_int4)ilogbf4(x11_v); + TEST_CHECK("20060904000011AAN", allequal_int4( res_v, r11_v ), 0); + res_v = (vec_int4)ilogbf4(x12_v); + TEST_CHECK("20060904000012AAN", allequal_int4( res_v, r12_v ), 0); + res_v = (vec_int4)ilogbf4(x13_v); + TEST_CHECK("20060904000013AAN", allequal_int4( res_v, r13_v ), 0); + res_v = (vec_int4)ilogbf4(x14_v); + TEST_CHECK("20060904000014AAN", allequal_int4( res_v, r14_v ), 0); + res_v = (vec_int4)ilogbf4(x15_v); + TEST_CHECK("20060904000015AAN", allequal_int4( res_v, r15_v ), 0); + res_v = (vec_int4)ilogbf4(x16_v); + TEST_CHECK("20060904000016AAN", allequal_int4( res_v, r16_v ), 0); + res_v = (vec_int4)ilogbf4(x17_v); + TEST_CHECK("20060904000017AAN", allequal_int4( res_v, r17_v ), 0); + res_v = (vec_int4)ilogbf4(x18_v); + TEST_CHECK("20060904000018AAN", allequal_int4( res_v, r18_v ), 0); + res_v = (vec_int4)ilogbf4(x19_v); + TEST_CHECK("20060904000019AAN", allequal_int4( res_v, r19_v ), 0); + res_v = (vec_int4)ilogbf4(x20_v); + TEST_CHECK("20060904000020AAN", allequal_int4( res_v, r20_v ), 0); + res_v = (vec_int4)ilogbf4(x21_v); + TEST_CHECK("20060904000021AAN", allequal_int4( res_v, r21_v ), 0); + res_v = (vec_int4)ilogbf4(x22_v); + TEST_CHECK("20060904000022AAN", allequal_int4( res_v, r22_v ), 0); + res_v = (vec_int4)ilogbf4(x23_v); + TEST_CHECK("20060904000023AAN", allequal_int4( res_v, r23_v ), 0); + res_v = (vec_int4)ilogbf4(x24_v); + TEST_CHECK("20060904000024AAN", allequal_int4( res_v, r24_v ), 0); + res_v = (vec_int4)ilogbf4(x25_v); + TEST_CHECK("20060904000025AAN", allequal_int4( res_v, r25_v ), 0); + res_v = (vec_int4)ilogbf4(x26_v); + TEST_CHECK("20060904000026AAN", allequal_int4( res_v, r26_v ), 0); + res_v = (vec_int4)ilogbf4(x27_v); + TEST_CHECK("20060904000027AAN", allequal_int4( res_v, r27_v ), 0); + res_v = (vec_int4)ilogbf4(x28_v); + TEST_CHECK("20060904000028AAN", allequal_int4( res_v, r28_v ), 0); + res_v = (vec_int4)ilogbf4(x29_v); + TEST_CHECK("20060904000029AAN", allequal_int4( res_v, r29_v ), 0); + res_v = (vec_int4)ilogbf4(x30_v); + TEST_CHECK("20060904000030AAN", allequal_int4( res_v, r30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/irintf4.c b/Extras/simdmathlibrary/spu/tests/irintf4.c new file mode 100644 index 000000000..afd1cbcc6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/irintf4.c @@ -0,0 +1,96 @@ +/* Test irintf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#define DEFINE_DATA(val,a,b)\ + float val = hide_float(a); \ + signed int val ## _out =b;\ + vec_float4 val ## _v = spu_splats(val);\ + vec_int4 val ## _out_v = spu_splats(val ## _out); + +#define DEFINE_DATA_UNSIGNED(val,a,b)\ + unsigned int val ## _in = a;\ + float val = make_float(val ## _in);\ + signed int val ## _out = b;\ + vec_float4 val ## _v = spu_splats(val);\ + vec_int4 val ## _out_v = spu_splats(val ## _out); + +#define DO_TEST(var,id) \ + res_v = irintf4(var ## _v); \ + TEST_CHECK(" #id ", allequal_int4( res_v, var ## _out_v ), 0); + +int main() +{ + vec_int4 res_v; + + TEST_SET_START("921537538600","RNT", "irintf4"); + + /* + Define original values and the results + */ + //s=0 + DEFINE_DATA(x1, 1.0, 1) + DEFINE_DATA(x2, -1.0,-1) + + //s=-1 + DEFINE_DATA(x3, 0.5, 0) + DEFINE_DATA(x4, -0.5, 0) + //s=-2 + DEFINE_DATA(x5, 0.25, 0) + //s=-3 + DEFINE_DATA(x6, 0.125, 0) + + //s=0, e=27, f=0 -> 134217728 + DEFINE_DATA_UNSIGNED(x7, 0x4d000000,134217728) + //s=0, e=-126, f=0 --> 0 + DEFINE_DATA_UNSIGNED(x8, 0x800000,0) + + /* TEST */ + TEST_START("irintf4"); + + DO_TEST(x1,921537538601RNT) + DO_TEST(x2,921537538602RNT) + DO_TEST(x3,921537538603RNT) + DO_TEST(x4,921537538604RNT) + DO_TEST(x5,921537538605RNT) + DO_TEST(x6,921537538606RNT) + DO_TEST(x7,921537538607RNT) + DO_TEST(x8,921537538608RNT) + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/iroundf4.c b/Extras/simdmathlibrary/spu/tests/iroundf4.c new file mode 100644 index 000000000..a6d3fe199 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/iroundf4.c @@ -0,0 +1,96 @@ +/* Test iroundf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#define DEFINE_DATA(val,a,b)\ + float val = hide_float(a); \ + signed int val ## _out =b;\ + vec_float4 val ## _v = spu_splats(val);\ + vec_int4 val ## _out_v = spu_splats(val ## _out); + +#define DEFINE_DATA_UNSIGNED(val,a,b)\ + unsigned int val ## _in = a;\ + float val = make_float(val ## _in);\ + signed int val ## _out = b;\ + vec_float4 val ## _v = spu_splats(val);\ + vec_int4 val ## _out_v = spu_splats(val ## _out); + +#define DO_TEST(var,id) \ + res_v = iroundf4(var ## _v); \ + TEST_CHECK(" #id ", allequal_int4( res_v, var ## _out_v ), 0); + + +int main() +{ + vec_int4 res_v; + + TEST_SET_START("592642590100","RUD", "iroundf4"); + + /* + Define original values and the results + */ + //s=0 + DEFINE_DATA(x1, 1.0, 1.0f) + DEFINE_DATA(x2, -1.0,-1.0f) + //s=-1 + DEFINE_DATA(x3, 0.5, 1.0f) + DEFINE_DATA(x4, -0.5, -1.0f) + //s=-2 + DEFINE_DATA(x5, 0.25, 0.0f) + //s=-3 + DEFINE_DATA(x6, 0.125, 0.0f) + //s=0, e=27, f=0 -> 134217728 + DEFINE_DATA_UNSIGNED(x7, 0x4d000000,134217728) + //s=0, e=-126, f=0 --> 0 + DEFINE_DATA_UNSIGNED(x8, 0x800000,0) + + /* TEST */ + TEST_START("iroundf4"); + + DO_TEST(x1,592642590101RUD) + DO_TEST(x2,592642590102RUD) + DO_TEST(x3,592642590103RUD) + DO_TEST(x4,592642590104RUD) + DO_TEST(x5,592642590105RUD) + DO_TEST(x6,592642590106RUD) + DO_TEST(x7,592642590107RUD) + DO_TEST(x8,592642590108RUD) + + TEST_SET_DONE(); + + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/is0denormd2.c b/Extras/simdmathlibrary/spu/tests/is0denormd2.c new file mode 100644 index 000000000..161bce6ec --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/is0denormd2.c @@ -0,0 +1,225 @@ +/* Test is0denormd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "is0denormd2"); + + // -Nan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0x0000000000000000ull; + + // -Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + // -Norm + double x3 = hide_double(-824842.58421394); + unsigned long long r3 = 0x0000000000000000ull; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + unsigned long long r4 = 0x0000000000000000ull; + + // -Denorm + double x5 = hide_double(-2.40e-310); + unsigned long long r5 = 0xffffffffffffffffull; + + // -Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0xffffffffffffffffull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0xffffffffffffffffull; + + // +Unf + double x9 = hide_double( 1.0e-999); + unsigned long long r9 = 0xffffffffffffffffull; + + // +Denorm + double x10 = hide_double( 2.40e-310); + unsigned long long r10 = 0xffffffffffffffffull; + + // +Dmin + double x11 = hide_double( DBL_MIN); + unsigned long long r11 = 0x0000000000000000ull; + + // +Norm + double x12 = hide_double(3.14152634); + unsigned long long r12 = 0x0000000000000000ull; + + // +Dmax + double x13 = hide_double(DBL_MAX); + unsigned long long r13 = 0x0000000000000000ull; + + // +Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0x0000000000000000ull; + + //+Nan + double x15 = hide_double( nan("")); + unsigned long long r15 = 0x0000000000000000ull; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0x000AAAAAAAAAAAAAull), -1.0e-999 }; + vec_ullong2 r16_v = (vec_ullong2) {0xffffffffffffffffull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_ullong2 r17_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("is0denormd2"); + + res_v = (vec_ullong2)is0denormd2(x0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)is0denormd2(x1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)is0denormd2(x2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)is0denormd2(x3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)is0denormd2(x4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)is0denormd2(x5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)is0denormd2(x6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)is0denormd2(x7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)is0denormd2(x8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)is0denormd2(x9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)is0denormd2(x10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)is0denormd2(x11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)is0denormd2(x12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)is0denormd2(x13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)is0denormd2(x14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)is0denormd2(x15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)is0denormd2(x16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)is0denormd2(x17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)is0denormd2(x18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)is0denormd2(x19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + res_v = (vec_ullong2)is0denormd2(x20_v); + TEST_CHECK("20060830000020AAN", allequal_ullong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/is0denormf4.c b/Extras/simdmathlibrary/spu/tests/is0denormf4.c new file mode 100644 index 000000000..ddf98b12d --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/is0denormf4.c @@ -0,0 +1,200 @@ +/* Test is0denormf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "is0denormf4"); + + // -Nan + float x0 = hide_float(-nan("")); + unsigned int r0 = 0x00000000; + + // -Inf + float x1 = hide_float(-1.0/0.0); + unsigned int r1 = 0x00000000; + + // -Smax + float x2 = hide_float(make_float(0xffffffff)); + unsigned int r2 = 0x00000000; + + // -Norm + float x3 = hide_float(-824842.58421394f); + unsigned int r3 = 0x00000000; + + // -Smin + float x4 = hide_float(make_float(0x80800000)); + unsigned int r4 = 0x00000000; + + // -Denorm + float x5 = hide_float(make_float(0x803aaaaa)); + unsigned int r5 = 0xffffffff; + + // -Unf + float x6 = hide_float(-1.0e-999); + unsigned int r6 = 0xffffffff; + + // -0 + float x7 = hide_float(-0.0f); + unsigned int r7 = 0xffffffff; + + // 0 + float x8 = hide_float( 0.0f); + unsigned int r8 = 0xffffffff; + + // +Unf + float x9 = hide_float( 1.0e-999); + unsigned int r9 = 0xffffffff; + + // +Denorm + float x10 = hide_float(make_float(0x003aaaaa)); + unsigned int r10 = 0xffffffff; + + // +Smin + float x11 = hide_float(make_float(0x00800000)); + unsigned int r11 = 0x00000000; + + // +Norm + float x12 = hide_float(3.14152634f); + unsigned int r12 = 0x00000000; + + // +Smax + float x13 = hide_float(make_float(0x7fffffff)); + unsigned int r13 = 0x00000000; + + // +Inf + float x14 = hide_float( 1.0/0.0); + unsigned int r14 = 0x00000000; + + //+Nan + float x15 = hide_float( nan("")); + unsigned int r15 = 0x00000000; + + // Compound + vec_float4 x16_v = (vec_float4) {make_float(0x003AAAAA), -1.0e-999, 345.27533, make_float(0x803AAAAA)}; + vec_uint4 r16_v = (vec_uint4) {0xffffffff, 0xffffffff, 0x00000000, 0xffffffff}; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_uint4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_uint4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_uint4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_uint4 r13_v = spu_splats(r13); + + vec_float4 x14_v = spu_splats(x14); + vec_uint4 r14_v = spu_splats(r14); + + vec_float4 x15_v = spu_splats(x15); + vec_uint4 r15_v = spu_splats(r15); + + vec_uint4 res_v; + + TEST_START("is0denormf4"); + + res_v = (vec_uint4)is0denormf4(x0_v); + TEST_CHECK("20060830000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)is0denormf4(x1_v); + TEST_CHECK("20060830000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)is0denormf4(x2_v); + TEST_CHECK("20060830000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)is0denormf4(x3_v); + TEST_CHECK("20060830000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)is0denormf4(x4_v); + TEST_CHECK("20060830000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)is0denormf4(x5_v); + TEST_CHECK("20060830000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)is0denormf4(x6_v); + TEST_CHECK("20060830000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)is0denormf4(x7_v); + TEST_CHECK("20060830000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)is0denormf4(x8_v); + TEST_CHECK("20060830000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)is0denormf4(x9_v); + TEST_CHECK("20060830000009AAN", allequal_uint4( res_v, r9_v ), 0); + res_v = (vec_uint4)is0denormf4(x10_v); + TEST_CHECK("20060830000010AAN", allequal_uint4( res_v, r10_v ), 0); + res_v = (vec_uint4)is0denormf4(x11_v); + TEST_CHECK("20060830000011AAN", allequal_uint4( res_v, r11_v ), 0); + res_v = (vec_uint4)is0denormf4(x12_v); + TEST_CHECK("20060830000012AAN", allequal_uint4( res_v, r12_v ), 0); + res_v = (vec_uint4)is0denormf4(x13_v); + TEST_CHECK("20060830000013AAN", allequal_uint4( res_v, r13_v ), 0); + res_v = (vec_uint4)is0denormf4(x14_v); + TEST_CHECK("20060830000014AAN", allequal_uint4( res_v, r14_v ), 0); + res_v = (vec_uint4)is0denormf4(x15_v); + TEST_CHECK("20060830000015AAN", allequal_uint4( res_v, r15_v ), 0); + res_v = (vec_uint4)is0denormf4(x16_v); + TEST_CHECK("20060830000016AAN", allequal_uint4( res_v, r16_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isequald2.c b/Extras/simdmathlibrary/spu/tests/isequald2.c new file mode 100644 index 000000000..3980e6955 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isequald2.c @@ -0,0 +1,271 @@ +/* Test isequald2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "isequald2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0x0000000000000000ull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0x0000000000000000ull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0xffffffffffffffffull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0x0000000000000000ull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0x0000000000000000ull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0xffffffffffffffffull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0xffffffffffffffffull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0xffffffffffffffffull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0x0000000000000000ull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0x0000000000000000ull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0xffffffffffffffffull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0x0000000000000000ull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0x0000000000000000ull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0x0000000000000000ull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0x0000000000000000ull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("isequald2"); + + res_v = (vec_ullong2)isequald2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isequald2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isequald2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isequald2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isequald2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isequald2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isequald2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isequald2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isequald2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isequald2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isequald2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isequald2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isequald2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isequald2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isequald2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isequald2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isequald2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isequald2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isequald2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isequald2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isequalf4.c b/Extras/simdmathlibrary/spu/tests/isequalf4.c new file mode 100644 index 000000000..702687813 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isequalf4.c @@ -0,0 +1,150 @@ +/* Test isequalf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060817000000AAN","AAN", "isequalf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0xffffffff; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0xffffffff; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isequalf4"); + + res_v = (vec_uint4)isequalf4(x0_v, y0_v); + TEST_CHECK("20060817000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isequalf4(x1_v, y1_v); + TEST_CHECK("20060817000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isequalf4(x2_v, y2_v); + TEST_CHECK("20060817000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isequalf4(x3_v, y3_v); + TEST_CHECK("20060817000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isequalf4(x4_v, y4_v); + TEST_CHECK("20060817000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isequalf4(x5_v, y5_v); + TEST_CHECK("20060817000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isequalf4(x6_v, y6_v); + TEST_CHECK("20060817000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isequalf4(x7_v, y7_v); + TEST_CHECK("20060817000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isequalf4(x8_v, y8_v); + TEST_CHECK("20060817000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isequalf4(x9_v, y9_v); + TEST_CHECK("20060817000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isfinited2.c b/Extras/simdmathlibrary/spu/tests/isfinited2.c new file mode 100644 index 000000000..7d045535a --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isfinited2.c @@ -0,0 +1,225 @@ +/* Test isfinited2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "isfinited2"); + + // -Nan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0x0000000000000000ull; + + // -Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + // -Norm + double x3 = hide_double(-824842.58421394); + unsigned long long r3 = 0xffffffffffffffffull; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + unsigned long long r4 = 0xffffffffffffffffull; + + // -Denorm + double x5 = hide_double(-2.40e-310); + unsigned long long r5 = 0xffffffffffffffffull; + + // -Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0xffffffffffffffffull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0xffffffffffffffffull; + + // +Unf + double x9 = hide_double( 1.0e-999); + unsigned long long r9 = 0xffffffffffffffffull; + + // +Denorm + double x10 = hide_double( 2.40e-310); + unsigned long long r10 = 0xffffffffffffffffull; + + // +Dmin + double x11 = hide_double( DBL_MIN); + unsigned long long r11 = 0xffffffffffffffffull; + + // +Norm + double x12 = hide_double(3.14152634); + unsigned long long r12 = 0xffffffffffffffffull; + + // +Dmax + double x13 = hide_double(DBL_MAX); + unsigned long long r13 = 0xffffffffffffffffull; + + // +Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0x0000000000000000ull; + + //+Nan + double x15 = hide_double( nan("")); + unsigned long long r15 = 0x0000000000000000ull; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0x000AAAAAAAAAAAAAull), -1.0e-999 }; + vec_ullong2 r16_v = (vec_ullong2) {0xffffffffffffffffull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_ullong2 r17_v = (vec_ullong2) {0xffffffffffffffffull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("isfinited2"); + + res_v = (vec_ullong2)isfinited2(x0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isfinited2(x1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isfinited2(x2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isfinited2(x3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isfinited2(x4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isfinited2(x5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isfinited2(x6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isfinited2(x7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isfinited2(x8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isfinited2(x9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isfinited2(x10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isfinited2(x11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isfinited2(x12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isfinited2(x13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isfinited2(x14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isfinited2(x15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isfinited2(x16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isfinited2(x17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isfinited2(x18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isfinited2(x19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + res_v = (vec_ullong2)isfinited2(x20_v); + TEST_CHECK("20060830000020AAN", allequal_ullong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isfinitef4.c b/Extras/simdmathlibrary/spu/tests/isfinitef4.c new file mode 100644 index 000000000..4d2e39f8f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isfinitef4.c @@ -0,0 +1,130 @@ +/* Test isfinitef4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060822000000AAN","AAN", "isfinitef4"); + + float x0 = hide_float(-0.0f); + unsigned int r0 = 0xffffffff; + + float x1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0xffffffff; + + float x2 = hide_float(-0.0000000013152f); + unsigned int r2 = 0xffffffff; + + float x3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + float x4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0xffffffff; + + float x5 = hide_float(876543.12345f); + unsigned int r5 = 0xffffffff; + + float x6 = hide_float( 1e-999); // Smin + unsigned int r6 = 0xffffffff; + + float x7 = hide_float(5172.2845321f); + unsigned int r7 = 0xffffffff; + + float x8 = hide_float(2353705.31415f); + unsigned int r8 = 0xffffffff; + + float x9 = hide_float( 1.0/0.0); // Smax + unsigned int r9 = 0xffffffff; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isfinitef4"); + + res_v = (vec_uint4)isfinitef4(x0_v); + TEST_CHECK("20060822000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isfinitef4(x1_v); + TEST_CHECK("20060822000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isfinitef4(x2_v); + TEST_CHECK("20060822000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isfinitef4(x3_v); + TEST_CHECK("20060822000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isfinitef4(x4_v); + TEST_CHECK("20060822000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isfinitef4(x5_v); + TEST_CHECK("20060822000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isfinitef4(x6_v); + TEST_CHECK("20060822000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isfinitef4(x7_v); + TEST_CHECK("20060822000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isfinitef4(x8_v); + TEST_CHECK("20060822000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isfinitef4(x9_v); + TEST_CHECK("20060822000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterd2.c b/Extras/simdmathlibrary/spu/tests/isgreaterd2.c new file mode 100644 index 000000000..420ba53c6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isgreaterd2.c @@ -0,0 +1,271 @@ +/* Test isgreaterd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "isgreaterd2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0xffffffffffffffffull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0xffffffffffffffffull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0x0000000000000000ull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0x0000000000000000ull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0xffffffffffffffffull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0xffffffffffffffffull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0x0000000000000000ull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0x0000000000000000ull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0x0000000000000000ull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0x0000000000000000ull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0xffffffffffffffffull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0x0000000000000000ull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0x0000000000000000ull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0xffffffffffffffffull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0x0000000000000000ull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0xffffffffffffffffull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("isgreaterd2"); + + res_v = (vec_ullong2)isgreaterd2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isgreaterd2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c b/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c new file mode 100644 index 000000000..f83e3db0c --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c @@ -0,0 +1,271 @@ +/* Test isgreaterequald2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "isgreaterequald2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0xffffffffffffffffull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0xffffffffffffffffull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0x0000000000000000ull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0xffffffffffffffffull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0xffffffffffffffffull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0xffffffffffffffffull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0xffffffffffffffffull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0xffffffffffffffffull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0xffffffffffffffffull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0x0000000000000000ull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0xffffffffffffffffull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0xffffffffffffffffull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0x0000000000000000ull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0xffffffffffffffffull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0x0000000000000000ull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0xffffffffffffffffull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("isgreaterequald2"); + + res_v = (vec_ullong2)isgreaterequald2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isgreaterequald2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c b/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c new file mode 100644 index 000000000..197822590 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c @@ -0,0 +1,150 @@ +/* Test isgreaterequalf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060817000000AAN","AAN", "isgreaterequalf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0xffffffff; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0xffffffff; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0xffffffff; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0xffffffff; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0xffffffff; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isgreaterequalf4"); + + res_v = (vec_uint4)isgreaterequalf4(x0_v, y0_v); + TEST_CHECK("20060817000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x1_v, y1_v); + TEST_CHECK("20060817000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x2_v, y2_v); + TEST_CHECK("20060817000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x3_v, y3_v); + TEST_CHECK("20060817000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x4_v, y4_v); + TEST_CHECK("20060817000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x5_v, y5_v); + TEST_CHECK("20060817000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x6_v, y6_v); + TEST_CHECK("20060817000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x7_v, y7_v); + TEST_CHECK("20060817000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x8_v, y8_v); + TEST_CHECK("20060817000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isgreaterequalf4(x9_v, y9_v); + TEST_CHECK("20060817000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterf4.c b/Extras/simdmathlibrary/spu/tests/isgreaterf4.c new file mode 100644 index 000000000..109ae143f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isgreaterf4.c @@ -0,0 +1,150 @@ +/* Test isgreaterf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060817000000AAN","AAN", "isgreaterf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0xffffffff; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0xffffffff; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0xffffffff; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isgreaterf4"); + + res_v = (vec_uint4)isgreaterf4(x0_v, y0_v); + TEST_CHECK("20060817000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isgreaterf4(x1_v, y1_v); + TEST_CHECK("20060817000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isgreaterf4(x2_v, y2_v); + TEST_CHECK("20060817000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isgreaterf4(x3_v, y3_v); + TEST_CHECK("20060817000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isgreaterf4(x4_v, y4_v); + TEST_CHECK("20060817000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isgreaterf4(x5_v, y5_v); + TEST_CHECK("20060817000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isgreaterf4(x6_v, y6_v); + TEST_CHECK("20060817000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isgreaterf4(x7_v, y7_v); + TEST_CHECK("20060817000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isgreaterf4(x8_v, y8_v); + TEST_CHECK("20060817000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isgreaterf4(x9_v, y9_v); + TEST_CHECK("20060817000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isinfd2.c b/Extras/simdmathlibrary/spu/tests/isinfd2.c new file mode 100644 index 000000000..c31c3baf5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isinfd2.c @@ -0,0 +1,225 @@ +/* Test isinfd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "isinfd2"); + + // -Nan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0x0000000000000000ull; + + // -Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0xffffffffffffffffull; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + // -Norm + double x3 = hide_double(-824842.58421394); + unsigned long long r3 = 0x0000000000000000ull; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + unsigned long long r4 = 0x0000000000000000ull; + + // -Denorm + double x5 = hide_double(-2.40e-310); + unsigned long long r5 = 0x0000000000000000ull; + + // -Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0x0000000000000000ull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0x0000000000000000ull; + + // +Inf + double x9 = hide_double( 1.0e999); + unsigned long long r9 = 0xffffffffffffffffull; + + // +Denorm + double x10 = hide_double( 2.40e-310); + unsigned long long r10 = 0x0000000000000000ull; + + // +Dmin + double x11 = hide_double( DBL_MIN); + unsigned long long r11 = 0x0000000000000000ull; + + // +Norm + double x12 = hide_double(3.14152634); + unsigned long long r12 = 0x0000000000000000ull; + + // +Dmax + double x13 = hide_double(DBL_MAX); + unsigned long long r13 = 0x0000000000000000ull; + + // +Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0xffffffffffffffffull; + + //+Nan + double x15 = hide_double( nan("")); + unsigned long long r15 = 0x0000000000000000ull; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0x000AAAAAAAAAAAAAull), -1.0e-999 }; + vec_ullong2 r16_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_ullong2 r17_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), 1.0e999 }; + vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_ullong2 r19_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + // Compound + vec_double2 x20_v = (vec_double2) { -1.0e999, -1.0/0.0} ; + vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0xffffffffffffffffull}; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("isinfd2"); + + res_v = (vec_ullong2)isinfd2(x0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isinfd2(x1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isinfd2(x2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isinfd2(x3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isinfd2(x4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isinfd2(x5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isinfd2(x6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isinfd2(x7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isinfd2(x8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isinfd2(x9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isinfd2(x10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isinfd2(x11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isinfd2(x12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isinfd2(x13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isinfd2(x14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isinfd2(x15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isinfd2(x16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isinfd2(x17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isinfd2(x18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isinfd2(x19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + res_v = (vec_ullong2)isinfd2(x20_v); + TEST_CHECK("20060830000020AAN", allequal_ullong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isinff4.c b/Extras/simdmathlibrary/spu/tests/isinff4.c new file mode 100644 index 000000000..2e3888d89 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isinff4.c @@ -0,0 +1,130 @@ +/* Test isinff4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060822000000AAN","AAN", "isinff4"); + + float x0 = hide_float(-0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isinff4"); + + res_v = (vec_uint4)isinff4(x0_v); + TEST_CHECK("20060822000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isinff4(x1_v); + TEST_CHECK("20060822000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isinff4(x2_v); + TEST_CHECK("20060822000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isinff4(x3_v); + TEST_CHECK("20060822000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isinff4(x4_v); + TEST_CHECK("20060822000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isinff4(x5_v); + TEST_CHECK("20060822000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isinff4(x6_v); + TEST_CHECK("20060822000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isinff4(x7_v); + TEST_CHECK("20060822000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isinff4(x8_v); + TEST_CHECK("20060822000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isinff4(x9_v); + TEST_CHECK("20060822000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessd2.c b/Extras/simdmathlibrary/spu/tests/islessd2.c new file mode 100644 index 000000000..51e800816 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessd2.c @@ -0,0 +1,272 @@ +/* Test islessd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "islessd2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0x0000000000000000ull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0xffffffffffffffffull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0x0000000000000000ull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0x0000000000000000ull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0x0000000000000000ull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0x0000000000000000ull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0x0000000000000000ull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0x0000000000000000ull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0xffffffffffffffffull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0x0000000000000000ull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0x0000000000000000ull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0xffffffffffffffffull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0x0000000000000000ull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0xffffffffffffffffull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0x0000000000000000ull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("islessd2"); + + res_v = (vec_ullong2)islessd2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)islessd2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)islessd2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)islessd2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)islessd2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)islessd2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)islessd2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)islessd2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)islessd2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)islessd2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)islessd2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)islessd2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)islessd2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)islessd2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)islessd2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)islessd2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)islessd2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)islessd2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)islessd2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)islessd2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessequald2.c b/Extras/simdmathlibrary/spu/tests/islessequald2.c new file mode 100644 index 000000000..d9d958b76 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessequald2.c @@ -0,0 +1,271 @@ +/* Test islessequald2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "islessequald2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0x0000000000000000ull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0xffffffffffffffffull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0xffffffffffffffffull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0x0000000000000000ull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0x0000000000000000ull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0xffffffffffffffffull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0xffffffffffffffffull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0xffffffffffffffffull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0xffffffffffffffffull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0x0000000000000000ull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0xffffffffffffffffull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0xffffffffffffffffull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0x0000000000000000ull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0xffffffffffffffffull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0x0000000000000000ull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("islessequald2"); + + res_v = (vec_ullong2)islessequald2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)islessequald2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)islessequald2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)islessequald2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)islessequald2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)islessequald2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)islessequald2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)islessequald2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)islessequald2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)islessequald2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)islessequald2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)islessequald2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)islessequald2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)islessequald2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)islessequald2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)islessequald2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)islessequald2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)islessequald2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)islessequald2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)islessequald2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessequalf4.c b/Extras/simdmathlibrary/spu/tests/islessequalf4.c new file mode 100644 index 000000000..0af521028 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessequalf4.c @@ -0,0 +1,150 @@ +/* Test islessequalf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060816000000AAN","AAN", "islessequalf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0xffffffff; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0xffffffff; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0xffffffff; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0xffffffff; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0xffffffff; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0xffffffff; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("islessequalf4"); + + res_v = (vec_uint4)islessequalf4(x0_v, y0_v); + TEST_CHECK("20060816000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)islessequalf4(x1_v, y1_v); + TEST_CHECK("20060816000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)islessequalf4(x2_v, y2_v); + TEST_CHECK("20060816000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)islessequalf4(x3_v, y3_v); + TEST_CHECK("20060816000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)islessequalf4(x4_v, y4_v); + TEST_CHECK("20060816000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)islessequalf4(x5_v, y5_v); + TEST_CHECK("20060816000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)islessequalf4(x6_v, y6_v); + TEST_CHECK("20060816000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)islessequalf4(x7_v, y7_v); + TEST_CHECK("20060816000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)islessequalf4(x8_v, y8_v); + TEST_CHECK("20060816000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)islessequalf4(x9_v, y9_v); + TEST_CHECK("20060816000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessf4.c b/Extras/simdmathlibrary/spu/tests/islessf4.c new file mode 100644 index 000000000..26fcaa22a --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessf4.c @@ -0,0 +1,150 @@ +/* Test islessf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060815000000AAN","AAN", "islessf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0xffffffff; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0xffffffff; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0xffffffff; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0xffffffff; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("islessf4"); + + res_v = (vec_uint4)islessf4(x0_v, y0_v); + TEST_CHECK("20060815000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)islessf4(x1_v, y1_v); + TEST_CHECK("20060815000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)islessf4(x2_v, y2_v); + TEST_CHECK("20060815000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)islessf4(x3_v, y3_v); + TEST_CHECK("20060815000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)islessf4(x4_v, y4_v); + TEST_CHECK("20060815000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)islessf4(x5_v, y5_v); + TEST_CHECK("20060815000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)islessf4(x6_v, y6_v); + TEST_CHECK("20060815000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)islessf4(x7_v, y7_v); + TEST_CHECK("20060815000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)islessf4(x8_v, y8_v); + TEST_CHECK("20060815000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)islessf4(x9_v, y9_v); + TEST_CHECK("20060815000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c b/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c new file mode 100644 index 000000000..5d93d2d0b --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c @@ -0,0 +1,271 @@ +/* Test islessgreaterd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060825000000AAN","AAN", "islessgreaterd2"); + + //-QNaN: NG + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0x0000000000000000ull; + + //+Inf > -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0xffffffffffffffffull; + + //-Inf < -Dmax + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + //-Norm > -Inf + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(-1.0/0.0); + unsigned long long r3 = 0xffffffffffffffffull; + + //-Norm < -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0xffffffffffffffffull; + + //-Norm = -Norm + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-168.97345223013); + unsigned long long r5 = 0x0000000000000000ull; + + //-Norm > -Norm + double x6 = hide_double(-168.97345223013); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0xffffffffffffffffull; + + //-Norm < -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + //-Unf > -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0xffffffffffffffffull; + + //-Unf = 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0x0000000000000000ull; + + //-0 = 0 + double x10 = hide_double(-0.0); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0x0000000000000000ull; + + //+Unf = 0 + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( 0.0); + unsigned long long r11 = 0x0000000000000000ull; + + //+Unf < +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0xffffffffffffffffull; + + //+Norm > +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0xffffffffffffffffull; + + //+Norm = +Norm + double x14 = hide_double(5172.2845321); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0x0000000000000000ull; + + //+Norm < +Norm + double x15 = hide_double(264.345643345); + double y15 = hide_double(2353705.31415); + unsigned long long r15 = 0xffffffffffffffffull; + + //+Norm > -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0xffffffffffffffffull; + + //+Norm < +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0xffffffffffffffffull; + + //+Inf > +Dmax + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double(DBL_MAX); + unsigned long long r18 = 0xffffffffffffffffull; + + //+QNaN: NG + double x19 = hide_double(nan("")); + double y19 = hide_double(3.14); + unsigned long long r19 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("islessgreaterd2"); + + res_v = (vec_ullong2)islessgreaterd2(x0_v, y0_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x1_v, y1_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x2_v, y2_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x3_v, y3_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x4_v, y4_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x5_v, y5_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x6_v, y6_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x7_v, y7_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x8_v, y8_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x9_v, y9_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x10_v, y10_v); + TEST_CHECK("20060825000000AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x11_v, y11_v); + TEST_CHECK("20060825000001AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x12_v, y12_v); + TEST_CHECK("20060825000002AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x13_v, y13_v); + TEST_CHECK("20060825000003AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x14_v, y14_v); + TEST_CHECK("20060825000004AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x15_v, y15_v); + TEST_CHECK("20060825000005AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x16_v, y16_v); + TEST_CHECK("20060825000006AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x17_v, y17_v); + TEST_CHECK("20060825000007AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x18_v, y18_v); + TEST_CHECK("20060825000008AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)islessgreaterd2(x19_v, y19_v); + TEST_CHECK("20060825000009AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c b/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c new file mode 100644 index 000000000..51cf06672 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c @@ -0,0 +1,150 @@ +/* Test islessgreaterf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060816000000AAN","AAN", "islessgreaterf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0xffffffff; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0xffffffff; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0xffffffff; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0xffffffff; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0xffffffff; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0xffffffff; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0xffffffff; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("islessgreaterf4"); + + res_v = (vec_uint4)islessgreaterf4(x0_v, y0_v); + TEST_CHECK("20060816000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x1_v, y1_v); + TEST_CHECK("20060816000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x2_v, y2_v); + TEST_CHECK("20060816000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x3_v, y3_v); + TEST_CHECK("20060816000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x4_v, y4_v); + TEST_CHECK("20060816000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x5_v, y5_v); + TEST_CHECK("20060816000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x6_v, y6_v); + TEST_CHECK("20060816000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x7_v, y7_v); + TEST_CHECK("20060816000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x8_v, y8_v); + TEST_CHECK("20060816000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)islessgreaterf4(x9_v, y9_v); + TEST_CHECK("20060816000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isnand2.c b/Extras/simdmathlibrary/spu/tests/isnand2.c new file mode 100644 index 000000000..c01b8eef3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isnand2.c @@ -0,0 +1,226 @@ +/* Test isnand2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "isnand2"); + + // -QNan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0xffffffffffffffffull; + + // -Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0x0000000000000000ull; + + // -QNaN + double x3 = hide_double(make_double(0xFFFFFFFFFFFFFFFFull)); + unsigned long long r3 = 0xffffffffffffffffull; + + // -SNaN + double x4 = hide_double(make_double(0xFFF7FFFFFFFFFFFFull)); + unsigned long long r4 = 0xffffffffffffffffull; + + // -Denorm + double x5 = hide_double(-2.40e-310); + unsigned long long r5 = 0x0000000000000000ull; + + // -Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0x0000000000000000ull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0x0000000000000000ull; + + // +Inf + double x9 = hide_double( 1.0e999); + unsigned long long r9 = 0x0000000000000000ull; + + // +QNaN + double x10 = hide_double(make_double(0x7FFFFFFFFFFFFFFFull)); + unsigned long long r10 = 0xffffffffffffffffull; + + // +Dmin + double x11 = hide_double( DBL_MIN); + unsigned long long r11 = 0x0000000000000000ull; + + // +Norm + double x12 = hide_double(3.14152634); + unsigned long long r12 = 0x0000000000000000ull; + + // +SNaN + double x13 = hide_double(make_double(0x7FF3333333333333ull)); + unsigned long long r13 = 0xffffffffffffffffull; + + // +Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0x0000000000000000ull; + + //+Nan + double x15 = hide_double( nan("")); + unsigned long long r15 = 0xffffffffffffffffull; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0xFFF7000000000000ull), -1.0e-999 }; + vec_ullong2 r16_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_ullong2 r17_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_ullong2 r18_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x20_v = (vec_double2) { make_double(0x7FF8000000000000ull), -1.0/0.0} ; + vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("isnand2"); + + res_v = (vec_ullong2)isnand2(x0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isnand2(x1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isnand2(x2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isnand2(x3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isnand2(x4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isnand2(x5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isnand2(x6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isnand2(x7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isnand2(x8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isnand2(x9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isnand2(x10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isnand2(x11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isnand2(x12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isnand2(x13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isnand2(x14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isnand2(x15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isnand2(x16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isnand2(x17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isnand2(x18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isnand2(x19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + res_v = (vec_ullong2)isnand2(x20_v); + TEST_CHECK("20060830000020AAN", allequal_ullong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isnanf4.c b/Extras/simdmathlibrary/spu/tests/isnanf4.c new file mode 100644 index 000000000..0bc6bb30e --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isnanf4.c @@ -0,0 +1,129 @@ +/* Test isnanf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060822000000AAN","AAN", "isnanf4"); + + float x0 = hide_float(-0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isnanf4"); + + res_v = (vec_uint4)isnanf4(x0_v); + TEST_CHECK("20060822000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isnanf4(x1_v); + TEST_CHECK("20060822000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isnanf4(x2_v); + TEST_CHECK("20060822000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isnanf4(x3_v); + TEST_CHECK("20060822000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isnanf4(x4_v); + TEST_CHECK("20060822000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isnanf4(x5_v); + TEST_CHECK("20060822000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isnanf4(x6_v); + TEST_CHECK("20060822000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isnanf4(x7_v); + TEST_CHECK("20060822000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isnanf4(x8_v); + TEST_CHECK("20060822000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isnanf4(x9_v); + TEST_CHECK("20060822000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isnormald2.c b/Extras/simdmathlibrary/spu/tests/isnormald2.c new file mode 100644 index 000000000..d2b2d290e --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isnormald2.c @@ -0,0 +1,225 @@ +/* Test isnormald2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "isnormald2"); + + // -Nan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0x0000000000000000ull; + + // -Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + // -Dmax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + // -Norm + double x3 = hide_double(-824842.58421394); + unsigned long long r3 = 0xffffffffffffffffull; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + unsigned long long r4 = 0xffffffffffffffffull; + + // -Denorm + double x5 = hide_double(-2.40e-310); + unsigned long long r5 = 0x0000000000000000ull; + + // -Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0x0000000000000000ull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0x0000000000000000ull; + + // +Unf + double x9 = hide_double( 1.0e-999); + unsigned long long r9 = 0x0000000000000000ull; + + // +Denorm + double x10 = hide_double( 2.40e-310); + unsigned long long r10 = 0x0000000000000000ull; + + // +Dmin + double x11 = hide_double( DBL_MIN); + unsigned long long r11 = 0xffffffffffffffffull; + + // +Norm + double x12 = hide_double(3.14152634); + unsigned long long r12 = 0xffffffffffffffffull; + + // +Dmax + double x13 = hide_double(DBL_MAX); + unsigned long long r13 = 0xffffffffffffffffull; + + // +Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0x0000000000000000ull; + + //+Nan + double x15 = hide_double( nan("")); + unsigned long long r15 = 0x0000000000000000ull; + + // Compound + vec_double2 x16_v = (vec_double2) {make_double(0x000AAAAAAAAAAAAAull), -1.0e-999 }; + vec_ullong2 r16_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x17_v = (vec_double2) { 345.27533, -2.40e-310 }; + vec_ullong2 r17_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; + + // Compound + vec_double2 x18_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; + + // Compound + vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + // Compound + vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_ullong2 r20_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("isnormald2"); + + res_v = (vec_ullong2)isnormald2(x0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isnormald2(x1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isnormald2(x2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isnormald2(x3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isnormald2(x4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isnormald2(x5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isnormald2(x6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isnormald2(x7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isnormald2(x8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isnormald2(x9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isnormald2(x10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isnormald2(x11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isnormald2(x12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isnormald2(x13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isnormald2(x14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isnormald2(x15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isnormald2(x16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isnormald2(x17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isnormald2(x18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isnormald2(x19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + res_v = (vec_ullong2)isnormald2(x20_v); + TEST_CHECK("20060830000020AAN", allequal_ullong2( res_v, r20_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isnormalf4.c b/Extras/simdmathlibrary/spu/tests/isnormalf4.c new file mode 100644 index 000000000..529c53c0a --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isnormalf4.c @@ -0,0 +1,130 @@ +/* Test isnormalf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060822000000AAN","AAN", "isnormalf4"); + + float x0 = hide_float(-0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0xffffffff; + + float x2 = hide_float( 0.0f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + float x4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(876543.12345f); + unsigned int r5 = 0xffffffff; + + float x6 = hide_float( 1e-999); // Smin + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + unsigned int r7 = 0xffffffff; + + float x8 = hide_float(2353705.31415f); + unsigned int r8 = 0xffffffff; + + float x9 = hide_float( 1.0/0.0); // Smax + unsigned int r9 = 0xffffffff; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isnormalf4"); + + res_v = (vec_uint4)isnormalf4(x0_v); + TEST_CHECK("20060822000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isnormalf4(x1_v); + TEST_CHECK("20060822000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isnormalf4(x2_v); + TEST_CHECK("20060822000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isnormalf4(x3_v); + TEST_CHECK("20060822000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isnormalf4(x4_v); + TEST_CHECK("20060822000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isnormalf4(x5_v); + TEST_CHECK("20060822000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isnormalf4(x6_v); + TEST_CHECK("20060822000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isnormalf4(x7_v); + TEST_CHECK("20060822000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isnormalf4(x8_v); + TEST_CHECK("20060822000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isnormalf4(x9_v); + TEST_CHECK("20060822000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isunorderedd2.c b/Extras/simdmathlibrary/spu/tests/isunorderedd2.c new file mode 100644 index 000000000..935947e9d --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isunorderedd2.c @@ -0,0 +1,271 @@ +/* Test isunorderedd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060830000000AAN","AAN", "isunorderedd2"); + + //-QNaN, Norm + double x0 = hide_double(-nan("")); + double y0 = hide_double(1.0); + unsigned long long r0 = 0xffffffffffffffffull; + + //+Inf, -Inf + double x1 = hide_double( 1.0/0.0); + double y1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0x0000000000000000ull; + + //-Inf, -QNaN + double x2 = hide_double(-1.0/0.0); + double y2 = hide_double(make_double(0xFFFFFFFFFFFFFFFFull)); + unsigned long long r2 = 0xffffffffffffffffull; + + //-Norm, -SNaN + double x3 = hide_double(-67418234.34256245); + double y3 = hide_double(make_double(0xFFF7FFFFFFFFFFFFull)); + unsigned long long r3 = 0xffffffffffffffffull; + + //-Norm, -Denorm + double x4 = hide_double(-273453.3234458053); + double y4 = hide_double(-3.0e-321); + unsigned long long r4 = 0x0000000000000000ull; + + //-Norm, -Inf + double x5 = hide_double(-168.97345223013); + double y5 = hide_double(-1.0/0.0); + unsigned long long r5 = 0x0000000000000000ull; + + //-QNaN, -Norm + double x6 = hide_double(-nan("")); + double y6 = hide_double(-21345853556.492); + unsigned long long r6 = 0xffffffffffffffffull; + + //-Norm, -0 + double x7 = hide_double(-168.97345223013); + double y7 = hide_double(-0.0); + unsigned long long r7 = 0x0000000000000000ull; + + //-Unf, -Norm + double x8 = hide_double(-1.0e-999); + double y8 = hide_double(-83532.96153153); + unsigned long long r8 = 0x0000000000000000ull; + + //-Unf, 0 + double x9 = hide_double(-1.0e-999); + double y9 = hide_double(0.0); + unsigned long long r9 = 0x0000000000000000ull; + + //QNaN, 0 + double x10 = hide_double(make_double(0x7FFFFFFFFFFFFFFFull)); + double y10 = hide_double( 0.0); + unsigned long long r10 = 0xffffffffffffffffull; + + //+Unf, +QNaN + double x11 = hide_double( 1.0e-999); + double y11 = hide_double( nan("")); + unsigned long long r11 = 0xffffffffffffffffull; + + //+Unf, +Norm + double x12 = hide_double( 1e-999); + double y12 = hide_double(0.0031529324); + unsigned long long r12 = 0x0000000000000000ull; + + //+Norm, +Denorm + double x13 = hide_double(5172.2845321); + double y13 = hide_double(3.0e-321); + unsigned long long r13 = 0x0000000000000000ull; + + //+SNaN, +Norm + double x14 = hide_double(make_double(0x7FF3333333333333ull)); + double y14 = hide_double(5172.2845321); + unsigned long long r14 = 0xffffffffffffffffull; + + //+Norm, +QNaN + double x15 = hide_double(264.345643345); + double y15 = hide_double(make_double(0x7FFAAAAAAAAAAAAAull)); + unsigned long long r15 = 0xffffffffffffffffull; + + //+Norm, -Norm + double x16 = hide_double( 926.605118542); + double y16 = hide_double(-9.43574552184); + unsigned long long r16 = 0x0000000000000000ull; + + //+Norm, +Dmax + double x17 = hide_double( 926.605118542); + double y17 = hide_double(DBL_MAX); + unsigned long long r17 = 0x0000000000000000ull; + + //+Inf, +Ovf + double x18 = hide_double( 1.0/0.0); + double y18 = hide_double( 1.0e999); + unsigned long long r18 = 0x0000000000000000ull; + + //+Inf, +QNaN + double x19 = hide_double( 1.0/0.0); + double y19 = hide_double(nan("")); + unsigned long long r19 = 0xffffffffffffffffull; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 y0_v = spu_splats(y0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 y1_v = spu_splats(y1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 y2_v = spu_splats(y2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 y3_v = spu_splats(y3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 y4_v = spu_splats(y4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 y5_v = spu_splats(y5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 y6_v = spu_splats(y6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 y7_v = spu_splats(y7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 y8_v = spu_splats(y8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 y9_v = spu_splats(y9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 y10_v = spu_splats(y10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 y11_v = spu_splats(y11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 y12_v = spu_splats(y12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 y13_v = spu_splats(y13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 y14_v = spu_splats(y14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 y15_v = spu_splats(y15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 y16_v = spu_splats(y16); + vec_ullong2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 y17_v = spu_splats(y17); + vec_ullong2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 y18_v = spu_splats(y18); + vec_ullong2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 y19_v = spu_splats(y19); + vec_ullong2 r19_v = spu_splats(r19); + + vec_ullong2 res_v; + + TEST_START("isunorderedd2"); + + res_v = (vec_ullong2)isunorderedd2(x0_v, y0_v); + TEST_CHECK("20060830000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x1_v, y1_v); + TEST_CHECK("20060830000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x2_v, y2_v); + TEST_CHECK("20060830000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x3_v, y3_v); + TEST_CHECK("20060830000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x4_v, y4_v); + TEST_CHECK("20060830000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x5_v, y5_v); + TEST_CHECK("20060830000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x6_v, y6_v); + TEST_CHECK("20060830000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x7_v, y7_v); + TEST_CHECK("20060830000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x8_v, y8_v); + TEST_CHECK("20060830000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x9_v, y9_v); + TEST_CHECK("20060830000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x10_v, y10_v); + TEST_CHECK("20060830000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x11_v, y11_v); + TEST_CHECK("20060830000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x12_v, y12_v); + TEST_CHECK("20060830000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x13_v, y13_v); + TEST_CHECK("20060830000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x14_v, y14_v); + TEST_CHECK("20060830000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x15_v, y15_v); + TEST_CHECK("20060830000015AAN", allequal_ullong2( res_v, r15_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x16_v, y16_v); + TEST_CHECK("20060830000016AAN", allequal_ullong2( res_v, r16_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x17_v, y17_v); + TEST_CHECK("20060830000017AAN", allequal_ullong2( res_v, r17_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x18_v, y18_v); + TEST_CHECK("20060830000018AAN", allequal_ullong2( res_v, r18_v ), 0); + res_v = (vec_ullong2)isunorderedd2(x19_v, y19_v); + TEST_CHECK("20060830000019AAN", allequal_ullong2( res_v, r19_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/isunorderedf4.c b/Extras/simdmathlibrary/spu/tests/isunorderedf4.c new file mode 100644 index 000000000..4ba85a6a5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/isunorderedf4.c @@ -0,0 +1,150 @@ +/* Test isunorderedf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060822000000AAN","AAN", "isunorderedf4"); + + float x0 = hide_float(-0.0f); + float y0 = hide_float( 0.0f); + unsigned int r0 = 0x00000000; + + float x1 = hide_float( 1.0/0.0); //+Smax + float y1 = hide_float(-1.0/0.0); //-Smax + unsigned int r1 = 0x00000000; + + float x2 = hide_float(-0.0000000013152f); + float y2 = hide_float(-234245.85323441f); + unsigned int r2 = 0x00000000; + + float x3 = hide_float(-168.97345223013f); + float y3 = hide_float(-168.97345223013f); + unsigned int r3 = 0x00000000; + + float x4 = hide_float(-83532.96153153f); + float y4 = hide_float(-1e-999); //-Smin + unsigned int r4 = 0x00000000; + + float x5 = hide_float(-321.01234567f); + float y5 = hide_float(876543.12345f); + unsigned int r5 = 0x00000000; + + float x6 = hide_float( 1e-999); // Smin + float y6 = hide_float(0.0031529324f); + unsigned int r6 = 0x00000000; + + float x7 = hide_float(5172.2845321f); + float y7 = hide_float(5172.2845321f); + unsigned int r7 = 0x00000000; + + float x8 = hide_float(264.345643345f); + float y8 = hide_float(2353705.31415f); + unsigned int r8 = 0x00000000; + + float x9 = hide_float( 1.0/0.0); // Smax + float y9 = hide_float(9.43574552184f); + unsigned int r9 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 y0_v = spu_splats(y0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 y1_v = spu_splats(y1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 y2_v = spu_splats(y2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 y3_v = spu_splats(y3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 y4_v = spu_splats(y4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 y5_v = spu_splats(y5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 y6_v = spu_splats(y6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 y7_v = spu_splats(y7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 y8_v = spu_splats(y8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 y9_v = spu_splats(y9); + vec_uint4 r9_v = spu_splats(r9); + + vec_uint4 res_v; + + TEST_START("isunorderedf4"); + + res_v = (vec_uint4)isunorderedf4(x0_v, y0_v); + TEST_CHECK("20060822000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)isunorderedf4(x1_v, y1_v); + TEST_CHECK("20060822000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)isunorderedf4(x2_v, y2_v); + TEST_CHECK("20060822000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)isunorderedf4(x3_v, y3_v); + TEST_CHECK("20060822000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)isunorderedf4(x4_v, y4_v); + TEST_CHECK("20060822000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)isunorderedf4(x5_v, y5_v); + TEST_CHECK("20060822000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)isunorderedf4(x6_v, y6_v); + TEST_CHECK("20060822000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)isunorderedf4(x7_v, y7_v); + TEST_CHECK("20060822000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)isunorderedf4(x8_v, y8_v); + TEST_CHECK("20060822000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)isunorderedf4(x9_v, y9_v); + TEST_CHECK("20060822000009AAN", allequal_uint4( res_v, r9_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/ldexpd2.c b/Extras/simdmathlibrary/spu/tests/ldexpd2.c new file mode 100644 index 000000000..bb959829d --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/ldexpd2.c @@ -0,0 +1,250 @@ +/* Test ldexpd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ ldexpd2 - Multiply Double by 2 Raised to its Power + * For large elements of ex (overflow), returns HUGE_VALF + * For small elements of ex (underflow), returns 0. + * + *@brief + * boundary test for ldexpd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#undef LDEXPD2_ROUND + + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int exp[2]; + unsigned long long int ans0[2]; + unsigned long long int ans1[2]; + unsigned long long int ans2[2]; + unsigned long long int ans3[2]; +} TestVec64_Ldexp; + +int main() +{ + TestVec64_Ldexp test_a[] = { + { + // zero + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000400ULL,0xFFFFFFFFFFFFFC00ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // MIN , MAX + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL} + },{ +#ifdef LDEXPD2_ROUND + // denotmalized + {0x8000000000000003ULL,0x0000000000000003ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x8000000000000002ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000002ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000002ULL,0x0000000000000002ULL} + },{ + // denotmalized -54 + {0x0010000000000001ULL,0x8010000000000001ULL}, + {0xFFFFFFFFFFFFFFCAULL,0xFFFFFFFFFFFFFFCAULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFFFFFFF7CEULL,0xFFFFFFFFFFFFF7CEULL}, + {0x0000000000000001ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000001ULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFFFFFFF7CDULL,0xFFFFFFFFFFFFF7CDULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ +#else // LDEXPD2_ROUND + // denotmalized + {0x8000000000000003ULL,0x0000000000000003ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL} + },{ + +#endif // LDEXPD2_ROUND + // denotmalized + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL} + },{ + // denotmalized + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL} + },{ + // 1.0 + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x00000000000003ffULL,0x00000000000003ffULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL} + },{ + // 1.0 -> max + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x0000000000000400ULL,0x0000000000000400ULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFF00000000ULL,0xFFFFFFFF00000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // min-> + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + },{ + // NaN , -NaN + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060905160000NM","NM", "ldexpd2"); + + TEST_START("ldexpd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + spu_mtfpscr(((vec_uint4){0x0100,0,0,0})); + res_v = ldexpd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090516%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans0[0])), 0); + +#ifdef LDEXPD2_ROUND + + spu_mtfpscr(((vec_uint4){0x0600,0,0,0})); + res_v = ldexpd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090516%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans1[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0b00,0,0,0})); + res_v = ldexpd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090516%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans2[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0c00,0,0,0})); + res_v = ldexpd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090516%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans3[0])), 0); +#endif // LDEXPD2_ROUND + + } + + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/llabsi2.c b/Extras/simdmathlibrary/spu/tests/llabsi2.c new file mode 100644 index 000000000..3b08ab15f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/llabsi2.c @@ -0,0 +1,94 @@ +/* Test llabsi2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ llabsi2 - returns absolute value of input. + * + *@brief + * boundary test for llabsi2. + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + + +#include +#include +#include +//#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20060831134500NM","NM", "llabsi2"); + + vec_llong2 x0_v = ((vec_llong2){ 0, 0}); + vec_llong2 r0_v = ((vec_llong2){ 0, 0}); + + vec_llong2 x1_v = ((vec_llong2){-1, 1}); + vec_llong2 r1_v = ((vec_llong2){ 1, 1}); + + vec_llong2 x2_v = ((vec_llong2){ 1,-1}); + vec_llong2 r2_v = ((vec_llong2){ 1, 1}); + // 0x7FFFFFFFFFFFFFFF + vec_llong2 x3_v = ((vec_llong2){ 9223372036854775807LL,-9223372036854775807LL}); + vec_llong2 r3_v = ((vec_llong2){ 9223372036854775807LL, 9223372036854775807LL}); + // 0x8000000000000000 + vec_llong2 x4_v = ((vec_llong2){0x8000000000000000LL,0x8000000000000000LL}); + vec_llong2 r4_v = ((vec_llong2){0x8000000000000000LL,0x8000000000000000LL}); + + vec_llong2 res_v; + + TEST_START("llabsi2"); + res_v = llabsi2 (x0_v); + TEST_CHECK("20060831134501NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llabsi2 (x1_v); + TEST_CHECK("20060831134502NM", allequal_llong2( res_v, r1_v ), 0); + res_v = llabsi2 (x2_v); + TEST_CHECK("20060831134503NM", allequal_llong2( res_v, r2_v ), 0); + res_v = llabsi2 (x3_v); + TEST_CHECK("20060831134504NM", allequal_llong2( res_v, r3_v ), 0); + res_v = llabsi2 (x4_v); + TEST_CHECK("20060831134505NM", allequal_llong2( res_v, r4_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/lldivi2.c b/Extras/simdmathlibrary/spu/tests/lldivi2.c new file mode 100644 index 000000000..2c0b9e32b --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/lldivi2.c @@ -0,0 +1,126 @@ +/* Test lldivi2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060908152000MH","MH", "lldivi2"); + + signed long long x0n = 0x0c0e84c75f216c43ll; + signed long long x0d = 0x00000000000abcdell; + signed long long x0q = 0x0000011f71fb04cbll; + signed long long x0r = 0x0000000000003039ll; + signed long long x1n = 0x0c0e84c75f216c43ll; + signed long long x1d = 0x0000011f71fb04cbll; + signed long long x1q = 0x00000000000abcdell; + signed long long x1r = 0x0000000000003039ll; + + signed long long x2n = 0x08e732f9d4baf903ll; + signed long long x2d = 0x0000000000976bb6ll; + signed long long x2q = 0x0000000f0d55f4d9ll; + signed long long x2r = 0x00000000004933bdll; + signed long long x3n = 0x08e732f9d4baf903ll; + signed long long x3d = 0x0000000f0d55f4d9ll; + signed long long x3q = 0x0000000000976bb6ll; + signed long long x3r = 0x00000000004933bdll; + + signed long long x4n = 0xffffffffffffffffll; + signed long long x4d = 0xfffffffffffffffell; + signed long long x4q = 0x0000000000000000ll; + signed long long x4r = 0xffffffffffffffffll; + signed long long x5n = 0xffffffffffffffffll; + signed long long x5d = 0x0000000000000001ll; + signed long long x5q = 0xffffffffffffffffll; + signed long long x5r = 0x0000000000000000ll; + + signed long long x6n = 0xffffffffffffffffll; + signed long long x6d = 0x0000000000000002ll; + signed long long x6q = 0x0000000000000000ll; + signed long long x6r = 0xffffffffffffffffll; + signed long long x7n = 0xffffffffffffffffll; + signed long long x7d = 0x7fffffffffffffffll; + signed long long x7q = 0x0000000000000000ll; + signed long long x7r = 0xffffffffffffffffll; + + signed long long x8n = 0xf8c0d45d0ff344f0ll; + signed long long x8d = 0x000019aa3e41e0bdll; + signed long long x8q = 0xffffffffffffb7b8ll; + signed long long x8r = 0xffffedc119afa218ll; + signed long long x9n = 0xf8c0d45d0ff344f0ll; + signed long long x9d = 0x000000000009b13bll; + signed long long x9q = 0xffffff4097efb39ell; + signed long long x9r = 0xfffffffffff6a186ll; + + vec_llong2 x0n_v = (vec_llong2){ x0n, x1n }; + vec_llong2 x0d_v = (vec_llong2){ x0d, x1d }; + vec_llong2 x0q_v = (vec_llong2){ x0q, x1q }; + vec_llong2 x0r_v = (vec_llong2){ x0r, x1r }; + vec_llong2 x1n_v = (vec_llong2){ x2n, x3n }; + vec_llong2 x1d_v = (vec_llong2){ x2d, x3d }; + vec_llong2 x1q_v = (vec_llong2){ x2q, x3q }; + vec_llong2 x1r_v = (vec_llong2){ x2r, x3r }; + vec_llong2 x2n_v = (vec_llong2){ x4n, x5n }; + vec_llong2 x2d_v = (vec_llong2){ x4d, x5d }; + vec_llong2 x2q_v = (vec_llong2){ x4q, x5q }; + vec_llong2 x2r_v = (vec_llong2){ x4r, x5r }; + vec_llong2 x3n_v = (vec_llong2){ x6n, x7n }; + vec_llong2 x3d_v = (vec_llong2){ x6d, x7d }; + vec_llong2 x3q_v = (vec_llong2){ x6q, x7q }; + vec_llong2 x3r_v = (vec_llong2){ x6r, x7r }; + vec_llong2 x4n_v = (vec_llong2){ x8n, x9n }; + vec_llong2 x4d_v = (vec_llong2){ x8d, x9d }; + vec_llong2 x4q_v = (vec_llong2){ x8q, x9q }; + vec_llong2 x4r_v = (vec_llong2){ x8r, x9r }; + + lldivi2_t res; + + TEST_START("lldivi2"); + res = lldivi2(x0n_v, x0d_v); + TEST_CHECK("20060908152001MH", allequal_llong2( res.quot, x0q_v ) && allequal_llong2( res.rem, x0r_v ), 0); + res = lldivi2(x1n_v, x1d_v); + TEST_CHECK("20060908152002MH", allequal_llong2( res.quot, x1q_v ) && allequal_llong2( res.rem, x1r_v ), 0); + res = lldivi2(x2n_v, x2d_v); + TEST_CHECK("20060908152003MH", allequal_llong2( res.quot, x2q_v ) && allequal_llong2( res.rem, x2r_v ), 0); + res = lldivi2(x3n_v, x3d_v); + TEST_CHECK("20060908152004MH", allequal_llong2( res.quot, x3q_v ) && allequal_llong2( res.rem, x3r_v ), 0); + res = lldivi2(x4n_v, x4d_v); + TEST_CHECK("20060908152005MH", allequal_llong2( res.quot, x4q_v ) && allequal_llong2( res.rem, x4r_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/lldivu2.c b/Extras/simdmathlibrary/spu/tests/lldivu2.c new file mode 100644 index 000000000..813b2d260 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/lldivu2.c @@ -0,0 +1,125 @@ +/* Test lldivu2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060904200000MH","MH", "lldivu2"); + + unsigned long long x0n = 0x0c0e84c75f216c43ull; + unsigned long long x0d = 0x00000000000abcdeull; + unsigned long long x0q = 0x0000011f71fb04cbull; + unsigned long long x0r = 0x0000000000003039ull; + unsigned long long x1n = 0x0c0e84c75f216c43ull; + unsigned long long x1d = 0x0000011f71fb04cbull; + unsigned long long x1q = 0x00000000000abcdeull; + unsigned long long x1r = 0x0000000000003039ull; + + unsigned long long x2n = 0x08e732f9d4baf903ull; + unsigned long long x2d = 0x0000000000976bb6ull; + unsigned long long x2q = 0x0000000f0d55f4d9ull; + unsigned long long x2r = 0x00000000004933bdull; + unsigned long long x3n = 0x08e732f9d4baf903ull; + unsigned long long x3d = 0x0000000f0d55f4d9ull; + unsigned long long x3q = 0x0000000000976bb6ull; + unsigned long long x3r = 0x00000000004933bdull; + + unsigned long long x4n = 0xffffffffffffffffull; + unsigned long long x4d = 0xfffffffffffffffeull; + unsigned long long x4q = 0x0000000000000001ull; + unsigned long long x4r = 0x0000000000000001ull; + unsigned long long x5n = 0xffffffffffffffffull; + unsigned long long x5d = 0x0000000000000001ull; + unsigned long long x5q = 0xffffffffffffffffull; + unsigned long long x5r = 0x0000000000000000ull; + + unsigned long long x6n = 0xffffffffffffffffull; + unsigned long long x6d = 0x0000000000000002ull; + unsigned long long x6q = 0x7fffffffffffffffull; + unsigned long long x6r = 0x0000000000000001ull; + unsigned long long x7n = 0xffffffffffffffffull; + unsigned long long x7d = 0x7fffffffffffffffull; + unsigned long long x7q = 0x0000000000000002ull; + unsigned long long x7r = 0x0000000000000001ull; + + unsigned long long x8n = 0xf8c0d45d0ff344f0ull; + unsigned long long x8d = 0x000019aa3e41e0bdull; + unsigned long long x8q = 0x000000000009b13bull; + unsigned long long x8r = 0x000004c443bccc61ull; + unsigned long long x9n = 0xf8c0d45d0ff344f0ull; + unsigned long long x9d = 0x000000000009b13bull; + unsigned long long x9q = 0x000019aa3ebfc739ull; + unsigned long long x9r = 0x000000000000f1cdull; + + vec_ullong2 x0n_v = (vec_ullong2){ x0n, x1n }; + vec_ullong2 x0d_v = (vec_ullong2){ x0d, x1d }; + vec_ullong2 x0q_v = (vec_ullong2){ x0q, x1q }; + vec_ullong2 x0r_v = (vec_ullong2){ x0r, x1r }; + vec_ullong2 x1n_v = (vec_ullong2){ x2n, x3n }; + vec_ullong2 x1d_v = (vec_ullong2){ x2d, x3d }; + vec_ullong2 x1q_v = (vec_ullong2){ x2q, x3q }; + vec_ullong2 x1r_v = (vec_ullong2){ x2r, x3r }; + vec_ullong2 x2n_v = (vec_ullong2){ x4n, x5n }; + vec_ullong2 x2d_v = (vec_ullong2){ x4d, x5d }; + vec_ullong2 x2q_v = (vec_ullong2){ x4q, x5q }; + vec_ullong2 x2r_v = (vec_ullong2){ x4r, x5r }; + vec_ullong2 x3n_v = (vec_ullong2){ x6n, x7n }; + vec_ullong2 x3d_v = (vec_ullong2){ x6d, x7d }; + vec_ullong2 x3q_v = (vec_ullong2){ x6q, x7q }; + vec_ullong2 x3r_v = (vec_ullong2){ x6r, x7r }; + vec_ullong2 x4n_v = (vec_ullong2){ x8n, x9n }; + vec_ullong2 x4d_v = (vec_ullong2){ x8d, x9d }; + vec_ullong2 x4q_v = (vec_ullong2){ x8q, x9q }; + vec_ullong2 x4r_v = (vec_ullong2){ x8r, x9r }; + + lldivu2_t res; + + TEST_START("lldivu2"); + res = lldivu2(x0n_v, x0d_v); + TEST_CHECK("20060904200001MH", allequal_ullong2( res.quot, x0q_v ) && allequal_ullong2( res.rem, x0r_v ), 0); + res = lldivu2(x1n_v, x1d_v); + TEST_CHECK("20060904200002MH", allequal_ullong2( res.quot, x1q_v ) && allequal_ullong2( res.rem, x1r_v ), 0); + res = lldivu2(x2n_v, x2d_v); + TEST_CHECK("20060904200003MH", allequal_ullong2( res.quot, x2q_v ) && allequal_ullong2( res.rem, x2r_v ), 0); + res = lldivu2(x3n_v, x3d_v); + TEST_CHECK("20060904200004MH", allequal_ullong2( res.quot, x3q_v ) && allequal_ullong2( res.rem, x3r_v ), 0); + res = lldivu2(x4n_v, x4d_v); + TEST_CHECK("20060904200005MH", allequal_ullong2( res.quot, x4q_v ) && allequal_ullong2( res.rem, x4r_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/llrintd2.c b/Extras/simdmathlibrary/spu/tests/llrintd2.c new file mode 100644 index 000000000..277688d5f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/llrintd2.c @@ -0,0 +1,215 @@ +/* Test llrintd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ llrintd2 - rounds two doubles in to two nearest 64bit integer. + * + *@brief + * boundary test for llrintd2. + * consistent with the current rounding mode. + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + **/ + + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + typedef union { + unsigned long long int ll; + double x; + } SrcType64; + SrcType64 tmp64; + TEST_SET_START("20060921101000NM","NM", "llrintd2"); + +// unsigned long long i11 = 0x7FFFFFFFFFFFFDFFull; //limit +// unsigned long long i12 = 0xFFFFFFFFFFFFFDFFull; //limit + +// double x0 = hide_double(-1.0/0.0); // -Inf +// double x1 = hide_double(1.0/0.0); // Inf + double x2 = hide_double(0.0); // +0 + double x3 = hide_double(-0.0); // -0 +// double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x5 = hide_double( 0.5); + double x6 = hide_double(-0.5); + double x7 = hide_double( 0.4999999999999999); // 0 + double x8 = hide_double(-0.4999999999999999); // 0 + double x9 = hide_double( 999999999999999.5); // 1000000000000000 + double x10 = hide_double(-999999999999999.5); //-1000000000000000 +// double x11 = hide_double(make_double(i11)); // 9223372036854774784 +// double x12 = hide_double(make_double(i12)); //-9223372036854774784 + double x11 = hide_double(9223372036854774784ll); // 9223372036854774784 + double x12 = hide_double(-9223372036854774784ll); //-9223372036854774784 + double x13 = DBL_MIN; + double x14 = (0.0 - DBL_MIN); + + double x15 = hide_double( 0.5000000000000001); + double x16 = hide_double(-0.000001); + + tmp64.ll = 0x3FEFFFFFFFFFFFFFULL; + double x17 = tmp64.x; + tmp64.ll = 0xC32FFFFFFFFFFFFFULL; + double x18 = tmp64.x; + + vec_double2 x0_v = ((vec_double2){x2, x3}); //+0,-0 + vec_llong2 r0_v = ((vec_llong2){0,0}); // 0, 0 + vec_double2 x1_v = ((vec_double2){x5, x8}); //+0.5,-0.4999999999999999 + vec_llong2 r10_v = ((vec_llong2){0,0}); + vec_llong2 r11_v = ((vec_llong2){0,0}); + vec_llong2 r12_v = ((vec_llong2){1,0}); + vec_llong2 r13_v = ((vec_llong2){0,-1}); + vec_double2 x2_v = ((vec_double2){x7, x6}); //+0.4999999999999999, -0.5 + vec_llong2 r20_v = ((vec_llong2){0,0}); + vec_llong2 r21_v = ((vec_llong2){0,0}); + vec_llong2 r22_v = ((vec_llong2){1,0}); + vec_llong2 r23_v = ((vec_llong2){0,-1}); + vec_double2 x3_v = ((vec_double2){x10, x11}); //-999999999999999.5, 9223372036854774784 + vec_llong2 r30_v = ((vec_llong2){-1000000000000000ll,9223372036854774784ll}); + vec_llong2 r31_v = ((vec_llong2){-999999999999999ll, 9223372036854774784ll}); + vec_llong2 r32_v = ((vec_llong2){-999999999999999ll, 9223372036854774784ll}); + vec_llong2 r33_v = ((vec_llong2){-1000000000000000ll,9223372036854774784ll}); + vec_double2 x4_v = ((vec_double2){x12, x9}); //-9223372036854774784, 999999999999999.5 + vec_llong2 r40_v = ((vec_llong2){-9223372036854774784ll,1000000000000000ll}); + vec_llong2 r41_v = ((vec_llong2){-9223372036854774784ll,999999999999999ll}); + vec_llong2 r42_v = ((vec_llong2){-9223372036854774784ll,1000000000000000ll}); + vec_llong2 r43_v = ((vec_llong2){-9223372036854774784ll,999999999999999ll}); + vec_double2 x5_v = ((vec_double2){x13, x14}); + vec_llong2 r50_v = ((vec_llong2){0,0}); + vec_llong2 r51_v = ((vec_llong2){0,0}); + vec_llong2 r52_v = ((vec_llong2){1,0}); + vec_llong2 r53_v = ((vec_llong2){0,-1}); + vec_double2 x6_v = ((vec_double2){x15, x16}); + vec_llong2 r60_v = ((vec_llong2){1,0}); + vec_llong2 r61_v = ((vec_llong2){0,0}); + vec_llong2 r62_v = ((vec_llong2){1,0}); + vec_llong2 r63_v = ((vec_llong2){0,-1}); + + vec_double2 x7_v = ((vec_double2){x17, x18}); + vec_llong2 r70_v = ((vec_llong2){1,-4503599627370496LL}); + vec_llong2 r71_v = ((vec_llong2){0,-4503599627370495LL}); + vec_llong2 r72_v = ((vec_llong2){1,-4503599627370495LL}); + vec_llong2 r73_v = ((vec_llong2){0,-4503599627370496LL}); + + vec_llong2 res_v; + + TEST_START("llrintd2"); + + spu_mtfpscr(((vec_uint4){0x0000,0,0,0})); //change FP mode + res_v = llrintd2 (x0_v); + TEST_CHECK("20060921101001NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llrintd2 (x1_v); + TEST_CHECK("20060921101002NM", allequal_llong2( res_v, r10_v ), 0); + res_v = llrintd2 (x2_v); + TEST_CHECK("20060921101003NM", allequal_llong2( res_v, r20_v ), 0); + res_v = llrintd2 (x3_v); + TEST_CHECK("20060921101004NM", allequal_llong2( res_v, r30_v ), 0); + res_v = llrintd2 (x4_v); + TEST_CHECK("20060921101005NM", allequal_llong2( res_v, r40_v ), 0); + res_v = llrintd2 (x5_v); + TEST_CHECK("20060921101006NM", allequal_llong2( res_v, r50_v ), 0); + res_v = llrintd2 (x6_v); + TEST_CHECK("20060921101007NM", allequal_llong2( res_v, r60_v ), 0); + res_v = llrintd2 (x7_v); + TEST_CHECK("20060921101017NM", allequal_llong2( res_v, r70_v ), 0); + + spu_mtfpscr(((vec_uint4){0x0500,0,0,0})); //change FP mode + res_v = llrintd2 (x0_v); + TEST_CHECK("20060921101008NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llrintd2 (x1_v); + TEST_CHECK("20060921101009NM", allequal_llong2( res_v, r11_v ), 0); + res_v = llrintd2 (x2_v); + TEST_CHECK("20060921101010NM", allequal_llong2( res_v, r21_v ), 0); + res_v = llrintd2 (x3_v); + TEST_CHECK("20060921101011NM", allequal_llong2( res_v, r31_v ), 0); + res_v = llrintd2 (x4_v); + TEST_CHECK("20060921101012NM", allequal_llong2( res_v, r41_v ), 0); + res_v = llrintd2 (x5_v); + TEST_CHECK("20060921101013NM", allequal_llong2( res_v, r51_v ), 0); + res_v = llrintd2 (x6_v); + TEST_CHECK("20060921101014NM", allequal_llong2( res_v, r61_v ), 0); + res_v = llrintd2 (x7_v); + TEST_CHECK("20060921101027NM", allequal_llong2( res_v, r71_v ), 0); + + spu_mtfpscr(((vec_uint4){0x0a00,0,0,0})); //change FP mode + res_v = llrintd2 (x0_v); + TEST_CHECK("20060921101015NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llrintd2 (x1_v); + TEST_CHECK("20060921101016NM", allequal_llong2( res_v, r12_v ), 0); + res_v = llrintd2 (x2_v); + TEST_CHECK("20060921101017NM", allequal_llong2( res_v, r22_v ), 0); + res_v = llrintd2 (x3_v); + TEST_CHECK("20060921101018NM", allequal_llong2( res_v, r32_v ), 0); + res_v = llrintd2 (x4_v); + TEST_CHECK("20060921101019NM", allequal_llong2( res_v, r42_v ), 0); + res_v = llrintd2 (x5_v); + TEST_CHECK("20060921101020NM", allequal_llong2( res_v, r52_v ), 0); + res_v = llrintd2 (x6_v); + TEST_CHECK("20060921101021NM", allequal_llong2( res_v, r62_v ), 0); + res_v = llrintd2 (x7_v); + TEST_CHECK("20060921101037NM", allequal_llong2( res_v, r72_v ), 0); + + spu_mtfpscr(((vec_uint4){0x0f00,0,0,0})); //change FP mode + res_v = llrintd2 (x0_v); + TEST_CHECK("20060921101022NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llrintd2 (x1_v); + TEST_CHECK("20060921101023NM", allequal_llong2( res_v, r13_v ), 0); + res_v = llrintd2 (x2_v); + TEST_CHECK("20060921101024NM", allequal_llong2( res_v, r23_v ), 0); + res_v = llrintd2 (x3_v); + TEST_CHECK("20060921101025NM", allequal_llong2( res_v, r33_v ), 0); + res_v = llrintd2 (x4_v); + TEST_CHECK("20060921101026NM", allequal_llong2( res_v, r43_v ), 0); + res_v = llrintd2 (x5_v); + TEST_CHECK("20060921101027NM", allequal_llong2( res_v, r53_v ), 0); + res_v = llrintd2 (x6_v); + TEST_CHECK("20060921101028NM", allequal_llong2( res_v, r63_v ), 0); + res_v = llrintd2 (x7_v); + TEST_CHECK("20060921101047NM", allequal_llong2( res_v, r73_v ), 0); + + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/llrintf4.c b/Extras/simdmathlibrary/spu/tests/llrintf4.c new file mode 100644 index 000000000..e165a6524 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/llrintf4.c @@ -0,0 +1,124 @@ +/* Test llrintf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ llrintf4 - rounds four floats in to four nearest 64bit integer. + * + *@brief + * boundary test for llrintf4. On SPU the rounding mode for floats is always towards 0. + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + **/ + + + +#include +#include +#include +//#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +static inline llroundf4_t set_llrintf4_t(long long int in0, long long int in1, long long int in2, long long int in3) +{ + llroundf4_t res; + + res.vll[0] = ((vec_llong2){in0,in1}); + res.vll[1] = ((vec_llong2){in2,in3}); + + return res; +} +int main() +{ + TEST_SET_START("20060918101000NM","NM", "llrintf4"); + +// unsigned long i11 = 0x5efffffful; // 9223371487098961920 +// unsigned long i12 = 0xdefffffful; //-9223371487098961920 +// unsigned long i11 = 0x49fffffful; //2097151.875000 +// unsigned long i12 = 0x4a7ffffful; //4194303.750000 + +// float x0 = hide_float(-1.0/0.0); // -Inf +// float x1 = hide_float(1.0/0.0); // Inf + float x2 = hide_float(0.0); // +0 + float x3 = hide_float(-0.0); // -0 +// float x4 = hide_float(0.0/0.0); // NaN -> NaN + float x5 = hide_float( 0.5); + float x6 = hide_float(-0.5); + float x7 = hide_float(-0.499999); + float x8 = hide_float( 0.499999); + float x9 = hide_float(-999999.5); + float x10 = hide_float( 999999.5); +// float x11 = hide_float(make_float(i11)); +// float x12 = hide_float(make_float(i12)); + float x11 = hide_float( 9223371487098961920.); + float x12 = hide_float(-9223371487098961920.); + float x13 = (0.0 - FLT_MIN); + float x14 = FLT_MIN; + float x15 = hide_float(-2097151.875000); + float x16 = hide_float(-4194303.750000); + float x17 = hide_float( 4194303.750000); + float x18 = hide_float( 2097151.875000); + + vec_float4 x0_v = ((vec_float4){ x2, x3, x5, x6}); + llroundf4_t r0_v = set_llrintf4_t(0, 0, 0, 0); + + vec_float4 x1_v = ((vec_float4){ x7, x8, x9, x10}); + llroundf4_t r1_v = set_llrintf4_t(0, 0, -999999, 999999); + + vec_float4 x2_v = ((vec_float4){ x11, x12, x13, x14}); + llroundf4_t r2_v = set_llrintf4_t(9223371487098961920ll, -9223371487098961920ll, 0, 0); + + vec_float4 x3_v = ((vec_float4){ x15, x16, x17, x18}); + llroundf4_t r3_v = set_llrintf4_t(-2097151, -4194303, 4194303, 2097151); + + llroundf4_t res_v; + + TEST_START("llrintf4"); + res_v = llrintf4 (x0_v); + TEST_CHECK("20060918101001NM", allequal_llroundf4( res_v, r0_v ), 0); + res_v = llrintf4 (x1_v); + TEST_CHECK("20060918101002NM", allequal_llroundf4( res_v, r1_v ), 0); + res_v = llrintf4 (x2_v); + TEST_CHECK("20060918101003NM", allequal_llroundf4( res_v, r2_v ), 0); + res_v = llrintf4 (x3_v); + TEST_CHECK("20060918101004NM", allequal_llroundf4( res_v, r3_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/llroundd2.c b/Extras/simdmathlibrary/spu/tests/llroundd2.c new file mode 100644 index 000000000..e982dc199 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/llroundd2.c @@ -0,0 +1,114 @@ +/* Test llroundd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ llroundd2 - rounds two doubles in to two nearest 64bit integer. + * + *@brief + * boundary test for llroundd2. 0.5 will be rounded to far from 0. + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + **/ + + +#include +#include +#include +//#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20060916101000NM","NM", "llroundd2"); + +// unsigned long long i11 = 0x7FFFFFFFFFFFFDFFull; //limit +// unsigned long long i12 = 0xFFFFFFFFFFFFFDFFull; //limit + +// double x0 = hide_double(-1.0/0.0); // -Inf +// double x1 = hide_double(1.0/0.0); // Inf + double x2 = hide_double(0.0); // +0 + double x3 = hide_double(-0.0); // -0 +// double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x5 = hide_double( 0.5); + double x6 = hide_double(-0.5); + double x7 = hide_double( 0.4999999999999999); // 0 + double x8 = hide_double(-0.4999999999999999); // 0 + double x9 = hide_double( 999999999999999.5); // 1000000000000000 + double x10 = hide_double(-999999999999999.5); //-1000000000000000 +// double x11 = hide_double(make_double(i11)); // 9223372036854774784 +// double x12 = hide_double(make_double(i12)); //-9223372036854774784 + double x11 = hide_double(9223372036854774784ll); // 9223372036854774784 + double x12 = hide_double(-9223372036854774784ll); //-9223372036854774784 + double x13 = DBL_MIN; + double x14 = (0.0 - DBL_MIN); + + vec_double2 x0_v = ((vec_double2){x2, x3}); //+0,-0 + vec_llong2 r0_v = ((vec_llong2){0,0}); // 0, 0 + vec_double2 x1_v = ((vec_double2){x5, x8}); //+0.5,-0.4999999999999999 + vec_llong2 r1_v = ((vec_llong2){1,0}); // 1, 0 + vec_double2 x2_v = ((vec_double2){x7, x6}); //+0.4999999999999999, -0.5 + vec_llong2 r2_v = ((vec_llong2){0,-1}); // 0, -1 + vec_double2 x3_v = ((vec_double2){x10, x11}); //-999999999999999.5, 9223372036854774784 + vec_llong2 r3_v = ((vec_llong2){-1000000000000000ll,9223372036854774784ll}); + vec_double2 x4_v = ((vec_double2){x12, x9}); //-9223372036854774784, 999999999999999.5 + vec_llong2 r4_v = ((vec_llong2){-9223372036854774784ll,1000000000000000ll}); + vec_double2 x5_v = ((vec_double2){x13, x14}); + vec_llong2 r5_v = ((vec_llong2){0,0}); + + vec_llong2 res_v; + + TEST_START("llroundd2"); + res_v = llroundd2 (x0_v); + TEST_CHECK("20060916101001NM", allequal_llong2( res_v, r0_v ), 0); + res_v = llroundd2 (x1_v); + TEST_CHECK("20060916101002NM", allequal_llong2( res_v, r1_v ), 0); + res_v = llroundd2 (x2_v); + TEST_CHECK("20060916101003NM", allequal_llong2( res_v, r2_v ), 0); + res_v = llroundd2 (x3_v); + TEST_CHECK("20060916101004NM", allequal_llong2( res_v, r3_v ), 0); + res_v = llroundd2 (x4_v); + TEST_CHECK("20060916101005NM", allequal_llong2( res_v, r4_v ), 0); + res_v = llroundd2 (x5_v); + TEST_CHECK("20060916101006NM", allequal_llong2( res_v, r5_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/llroundf4.c b/Extras/simdmathlibrary/spu/tests/llroundf4.c new file mode 100644 index 000000000..7b1cf6643 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/llroundf4.c @@ -0,0 +1,126 @@ +/* Test llroundf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ llroundf4 - rounds four floats in to four nearest 64bit integer. + * + *@brief + * boundary test for llroundf4. 0.5 will be rounded to far from 0. + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * when comparing with return of ppu scalar math library + * answer of 0x??7fffff and 0x??ffffff was something strange + * + **/ + + + +#include +#include +#include +//#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +static inline llroundf4_t set_llroundf4_t(long long int in0, long long int in1, long long int in2, long long int in3) +{ + llroundf4_t res; + + res.vll[0] = ((vec_llong2){in0,in1}); + res.vll[1] = ((vec_llong2){in2,in3}); + + return res; +} +int main() +{ + TEST_SET_START("20060917101000NM","NM", "llroundf4"); + +// unsigned long i11 = 0x5efffffful; // 9223371487098961920 +// unsigned long i12 = 0xdefffffful; //-9223371487098961920 +// unsigned long i11 = 0x49fffffful; //2097151.875000 +// unsigned long i12 = 0x4a7ffffful; //4194303.750000 + +// float x0 = hide_float(-1.0/0.0); // -Inf +// float x1 = hide_float(1.0/0.0); // Inf + float x2 = hide_float(0.0); // +0 + float x3 = hide_float(-0.0); // -0 +// float x4 = hide_float(0.0/0.0); // NaN -> NaN + float x5 = hide_float( 0.5); + float x6 = hide_float(-0.5); + float x7 = hide_float(-0.499999); + float x8 = hide_float( 0.499999); + float x9 = hide_float(-999999.5); + float x10 = hide_float( 999999.5); +// float x11 = hide_float(make_float(i11)); +// float x12 = hide_float(make_float(i12)); + float x11 = hide_float( 9223371487098961920.); + float x12 = hide_float(-9223371487098961920.); + float x13 = (0.0 - FLT_MIN); + float x14 = FLT_MIN; + float x15 = hide_float(-2097151.875000); + float x16 = hide_float(-4194303.750000); + float x17 = hide_float( 4194303.750000); + float x18 = hide_float( 2097151.875000); + + vec_float4 x0_v = ((vec_float4){ x2, x3, x5, x6}); + llroundf4_t r0_v = set_llroundf4_t(0, 0, 1, -1); + + vec_float4 x1_v = ((vec_float4){ x7, x8, x9, x10}); + llroundf4_t r1_v = set_llroundf4_t(0, 0, -1000000, 1000000); + + vec_float4 x2_v = ((vec_float4){ x11, x12, x13, x14}); + llroundf4_t r2_v = set_llroundf4_t(9223371487098961920ll, -9223371487098961920ll, 0, 0); + + vec_float4 x3_v = ((vec_float4){ x15, x16, x17, x18}); + llroundf4_t r3_v = set_llroundf4_t(-2097152, -4194304, 4194304, 2097152); + + llroundf4_t res_v; + + TEST_START("llroundf4"); + res_v = llroundf4 (x0_v); + TEST_CHECK("20060916101001NM", allequal_llroundf4( res_v, r0_v ), 0); + res_v = llroundf4 (x1_v); + TEST_CHECK("20060916101002NM", allequal_llroundf4( res_v, r1_v ), 0); + res_v = llroundf4 (x2_v); + TEST_CHECK("20060916101003NM", allequal_llroundf4( res_v, r2_v ), 0); + res_v = llroundf4 (x3_v); + TEST_CHECK("20060916101004NM", allequal_llroundf4( res_v, r3_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/logbd2.c b/Extras/simdmathlibrary/spu/tests/logbd2.c new file mode 100644 index 000000000..fcb33f704 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/logbd2.c @@ -0,0 +1,326 @@ +/* Test logbd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#ifndef HUGE_VALL +#define HUGE_VALL __builtin_huge_vall () +#endif + +#ifndef DBL_INF +#define DBL_INF ((long long)0x7FF0000000000000ull) +#endif + +#ifndef DBL_NAN +#define DBL_NAN ((long long)0x7FF8000000000000ull) +#endif + +int main() +{ + TEST_SET_START("20060905000000AAN","AAN", "logbd2"); + + // -Nan + double x0 = hide_double(-nan("")); + double r0 = hide_double( nan("")); + + // -Inf + double x1 = hide_double(-1.0/0.0); + double r1 = hide_double(make_double(DBL_INF)); + + // -Dmax + double x2 = hide_double(-DBL_MAX); + double r2 = 1023.0; + + // -Norm + double x3 = hide_double(-824842.58421394); + double r3 = 19.0; + + // -Dmin + double x4 = hide_double(-DBL_MIN); + double r4 = -1022.0; + + // -Denorm + double x5 = hide_double(-2.40e-310); + double r5 = -1029.0; + + // -Unf + double x6 = hide_double(-1.0e-999); + double r6 = make_double(-((unsigned long long)HUGE_VALL)); + + // -0 + double x7 = hide_double(-0.0); + double r7 = make_double(-((unsigned long long)HUGE_VALL)); + + // 0 + double x8 = hide_double( 0.0); + double r8 = make_double(-((unsigned long long)HUGE_VALL)); + + // +Unf + double x9 = hide_double( 1.0e-999); + double r9 = make_double(-((unsigned long long)HUGE_VALL)); + + // +Denorm + double x10 = hide_double( 2.40e-310); + double r10 = -1029.0; + + // +Dmin + double x11 = hide_double( DBL_MIN); + double r11 = -1022.0; + + // +Norm + double x12 = hide_double(3.14152634); + double r12 = 1.0; + + // +Norm + double x13 = hide_double(7.0673903e149); + double r13 = 497.0; + + // +Norm + double x14 = hide_double(2.4673e304); + double r14 = 1011.0; + + // +Norm + double x15 = hide_double(7.235672e-25); + double r15 = -81.0; + + // +Denorm + double x16 = hide_double(9.452854e-312); + double r16 = -1034.0; + + // +Demorm + double x17 = hide_double(3.045784e-320); + double r17 = -1062.0; + + // -Norm + double x18 = hide_double(-6.459273e7); + double r18 = 25.0; + + // -Norm + double x19 = hide_double(-2.493472e-99); + double r19 = -328.0; + + // -Norm + double x20 = hide_double(-1.4824543e128); + double r20 = 425.0; + + // -Denorm + double x21 = hide_double(-5.53856231e-315); + double r21 = -1044.0; + + // -Demorm + double x22 = hide_double(-2.5684367e-312); + double r22 = -1036.0; + + // +Dmax + double x23 = hide_double(DBL_MAX); + double r23 = 1023.0; + + // +Inf + double x24 = hide_double( 1.0/0.0); + double r24 = hide_double(make_double(DBL_INF)); + + //+Nan + double x25 = hide_double( nan("")); + double r25 = hide_double( nan("")); + + // Compound + vec_double2 x26_v = (vec_double2) { -2.561286432e-317, -1.0e-999 }; + vec_double2 r26_v = (vec_double2) { -1052.0, make_double(-((unsigned long long)HUGE_VALL)) }; + + // Compound + vec_double2 x27_v = (vec_double2) { 345.27533, -8.673e-310 }; + vec_double2 r27_v = (vec_double2) { 8.0, -1027.0 }; + + // Compound + vec_double2 x28_v = (vec_double2) { nan(""), -3678342.8765343 }; + vec_double2 r28_v = (vec_double2) { nan(""), 21.0 }; + + // Compound + vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 r29_v = (vec_double2) { make_double(DBL_INF), nan("") }; + + // Compound + vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; + vec_double2 r30_v = (vec_double2) { -329.0, make_double(DBL_INF) }; + + vec_double2 x0_v = spu_splats(x0); + vec_double2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_double2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_double2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_double2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_double2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_double2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_double2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_double2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_double2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_double2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_double2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_double2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_double2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_double2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_double2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_double2 r15_v = spu_splats(r15); + + vec_double2 x16_v = spu_splats(x16); + vec_double2 r16_v = spu_splats(r16); + + vec_double2 x17_v = spu_splats(x17); + vec_double2 r17_v = spu_splats(r17); + + vec_double2 x18_v = spu_splats(x18); + vec_double2 r18_v = spu_splats(r18); + + vec_double2 x19_v = spu_splats(x19); + vec_double2 r19_v = spu_splats(r19); + + vec_double2 x20_v = spu_splats(x20); + vec_double2 r20_v = spu_splats(r20); + + vec_double2 x21_v = spu_splats(x21); + vec_double2 r21_v = spu_splats(r21); + + vec_double2 x22_v = spu_splats(x22); + vec_double2 r22_v = spu_splats(r22); + + vec_double2 x23_v = spu_splats(x23); + vec_double2 r23_v = spu_splats(r23); + + vec_double2 x24_v = spu_splats(x24); + vec_double2 r24_v = spu_splats(r24); + + vec_double2 x25_v = spu_splats(x25); + vec_double2 r25_v = spu_splats(r25); + + vec_double2 res_v; + + TEST_START("logbd2"); + + res_v = (vec_double2)logbd2(x0_v); + TEST_CHECK("20060905000000AAN", allnan_double2( res_v ), 0); (void)r0_v; + res_v = (vec_double2)logbd2(x1_v); + TEST_CHECK("20060905000001AAN", allequal_double2( res_v, r1_v ), 0); + res_v = (vec_double2)logbd2(x2_v); + TEST_CHECK("20060905000002AAN", allequal_double2( res_v, r2_v ), 0); + res_v = (vec_double2)logbd2(x3_v); + TEST_CHECK("20060905000003AAN", allequal_double2( res_v, r3_v ), 0); + res_v = (vec_double2)logbd2(x4_v); + TEST_CHECK("20060905000004AAN", allequal_double2( res_v, r4_v ), 0); + res_v = (vec_double2)logbd2(x5_v); + TEST_CHECK("20060905000005AAN", allequal_double2( res_v, r5_v ), 0); + res_v = (vec_double2)logbd2(x6_v); + TEST_CHECK("20060905000006AAN", allequal_double2( res_v, r6_v ), 0); + res_v = (vec_double2)logbd2(x7_v); + TEST_CHECK("20060905000007AAN", allequal_double2( res_v, r7_v ), 0); + res_v = (vec_double2)logbd2(x8_v); + TEST_CHECK("20060905000008AAN", allequal_double2( res_v, r8_v ), 0); + res_v = (vec_double2)logbd2(x9_v); + TEST_CHECK("20060905000009AAN", allequal_double2( res_v, r9_v ), 0); + res_v = (vec_double2)logbd2(x10_v); + TEST_CHECK("20060905000010AAN", allequal_double2( res_v, r10_v ), 0); + res_v = (vec_double2)logbd2(x11_v); + TEST_CHECK("20060905000011AAN", allequal_double2( res_v, r11_v ), 0); + res_v = (vec_double2)logbd2(x12_v); + TEST_CHECK("20060905000012AAN", allequal_double2( res_v, r12_v ), 0); + res_v = (vec_double2)logbd2(x13_v); + TEST_CHECK("20060905000013AAN", allequal_double2( res_v, r13_v ), 0); + res_v = (vec_double2)logbd2(x14_v); + TEST_CHECK("20060905000014AAN", allequal_double2( res_v, r14_v ), 0); + res_v = (vec_double2)logbd2(x15_v); + TEST_CHECK("20060905000015AAN", allequal_double2( res_v, r15_v ), 0); + res_v = (vec_double2)logbd2(x16_v); + TEST_CHECK("20060905000016AAN", allequal_double2( res_v, r16_v ), 0); + res_v = (vec_double2)logbd2(x17_v); + TEST_CHECK("20060905000017AAN", allequal_double2( res_v, r17_v ), 0); + res_v = (vec_double2)logbd2(x18_v); + TEST_CHECK("20060905000018AAN", allequal_double2( res_v, r18_v ), 0); + res_v = (vec_double2)logbd2(x19_v); + TEST_CHECK("20060905000019AAN", allequal_double2( res_v, r19_v ), 0); + res_v = (vec_double2)logbd2(x20_v); + TEST_CHECK("20060905000020AAN", allequal_double2( res_v, r20_v ), 0); + res_v = (vec_double2)logbd2(x21_v); + TEST_CHECK("20060905000021AAN", allequal_double2( res_v, r21_v ), 0); + res_v = (vec_double2)logbd2(x22_v); + TEST_CHECK("20060905000022AAN", allequal_double2( res_v, r22_v ), 0); + res_v = (vec_double2)logbd2(x23_v); + TEST_CHECK("20060905000023AAN", allequal_double2( res_v, r23_v ), 0); + res_v = (vec_double2)logbd2(x24_v); + TEST_CHECK("20060905000024AAN", allequal_double2( res_v, r24_v ), 0); + res_v = (vec_double2)logbd2(x25_v); + TEST_CHECK("20060905000025AAN", allnan_double2( res_v ), 0); (void)r25_v; + res_v = (vec_double2)logbd2(x26_v); + TEST_CHECK("20060905000026AAN", allequal_double2( res_v, r26_v ), 0); + res_v = (vec_double2)logbd2(x27_v); + TEST_CHECK("20060905000027AAN", allequal_double2( res_v, r27_v ), 0); + res_v = (vec_double2)logbd2(x28_v); + TEST_CHECK("20060905000028AAN", allequal_ulps_double2( res_v, r28_v, 0 ), 0); + res_v = (vec_double2)logbd2(x29_v); + TEST_CHECK("20060905000029AAN", allequal_ulps_double2( res_v, r29_v, 0 ), 0); + res_v = (vec_double2)logbd2(x30_v); + TEST_CHECK("20060905000030AAN", allequal_double2( res_v, r30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/logbf4.c b/Extras/simdmathlibrary/spu/tests/logbf4.c new file mode 100644 index 000000000..43b0ae873 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/logbf4.c @@ -0,0 +1,320 @@ +/* Test logbf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#ifndef HUGE_VALF +#define HUGE_VALF __builtin_huge_valf () +#endif + +int main() +{ + TEST_SET_START("20060905000000AAN","AAN", "logbf4"); + + // -Norm (IEEE-754: -Nan) + float x0 = hide_float(make_float(0xFFC00000)); + float r0 = 128.0f; + + // -Norm (IEEE-754: -Inf) + float x1 = hide_float(-1.0/0.0); + float r1 = 128.0f; + + // -Smax + float x2 = hide_float(-FLT_MAX); + float r2 = 128.0f; + + // -Norm + float x3 = hide_float(-824842.58421394); + float r3 = 19.0f; + + // -Smin + float x4 = hide_float(make_float(0x80800000)); + float r4 = -126.0f; + + // -Denorm + float x5 = hide_float(make_float(0x807AAAAA)); + float r5 = (float)-HUGE_VALF; + + // -Unf + float x6 = hide_float(-1.0e-999); + float r6 = (float)-HUGE_VALF; + + // -0 + float x7 = hide_float(-0.0); + float r7 = (float)-HUGE_VALF; + + // 0 + float x8 = hide_float( 0.0); + float r8 = (float)-HUGE_VALF; + + // +Unf + float x9 = hide_float( 1.0e-999); + float r9 = (float)-HUGE_VALF; + + // +Denorm + float x10 = hide_float(make_float(0x007AAAAA)); + float r10 = (float)-HUGE_VALF; + + // +Smin + float x11 = hide_float(make_float(0x00800000)); + float r11 = -126.0f; + + // +Norm + float x12 = hide_float(3.14152634); + float r12 = 1.0f; + + // +Norm + float x13 = hide_float(7.0673903e37); + float r13 = 125.0f; + + // +Norm + float x14 = hide_float(2.4673e14); + float r14 = 47.0f; + + // +Norm + float x15 = hide_float(7.235672e-25); + float r15 = -81.0f; + + // +Norm + float x16 = hide_float(9.452854e17); + float r16 = 59.0f; + + // +Norm + float x17 = hide_float(3.045784e-18); + float r17 = -59.0f; + + // -Norm + float x18 = hide_float(-6.459273e7); + float r18 = 25.0f; + + // -Norm + float x19 = hide_float(-2.493472e-9); + float r19 = -29.0f; + + // -Norm + float x20 = hide_float(-1.4824543e28); + float r20 = 93.0f; + + // -Norm + float x21 = hide_float(-5.53856231e-27); + float r21 = -88.0f; + + // -Norm + float x22 = hide_float(-1.000001); + float r22 = 0.0f; + + // +Smax + float x23 = hide_float(FLT_MAX); + float r23 = 128.0f; + + //+Norm (IEEE-754: +Inf) + float x24 = hide_float( 1.0/0.0); + float r24 = 128.0f; + + //+Norm (IEEE-754: +Nan) + float x25 = hide_float(make_float(0x7FC00000)); + float r25 = 128.0f; + + // Compound + vec_float4 x26_v = (vec_float4) { -2.561286432e10, FLT_MAX, -1.0e-999, 7.235672e-25 }; + vec_float4 r26_v = (vec_float4) { 34.0f, 128.0f, -HUGE_VALF, -81.0f }; + + // Compound + vec_float4 x27_v = (vec_float4) { 345.27533f, 7.0673903e37, -0.0f, -2.40e-310 }; + vec_float4 r27_v = (vec_float4) { 8.0f, 125.0f, -HUGE_VALF, -HUGE_VALF }; + + // Compound + vec_float4 x28_v = (vec_float4) { make_float(0x7FC00000), -824842.58421394f, -0.0f, -3678342.8765343f }; + vec_float4 r28_v = (vec_float4) { 128.0f, 19.0f, -HUGE_VALF, 21.0f }; + + // Compound + vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_float4 r29_v = (vec_float4) { 128.0f, -HUGE_VALF, -88.0f, 128.0f }; + + // Compound + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_float4 r30_v = (vec_float4) { -HUGE_VALF, -63.0f, -59.0f, 128.0f }; + + vec_float4 x0_v = spu_splats(x0); + vec_float4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_float4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_float4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_float4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_float4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_float4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_float4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_float4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_float4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_float4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_float4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_float4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_float4 r13_v = spu_splats(r13); + + vec_float4 x14_v = spu_splats(x14); + vec_float4 r14_v = spu_splats(r14); + + vec_float4 x15_v = spu_splats(x15); + vec_float4 r15_v = spu_splats(r15); + + vec_float4 x16_v = spu_splats(x16); + vec_float4 r16_v = spu_splats(r16); + + vec_float4 x17_v = spu_splats(x17); + vec_float4 r17_v = spu_splats(r17); + + vec_float4 x18_v = spu_splats(x18); + vec_float4 r18_v = spu_splats(r18); + + vec_float4 x19_v = spu_splats(x19); + vec_float4 r19_v = spu_splats(r19); + + vec_float4 x20_v = spu_splats(x20); + vec_float4 r20_v = spu_splats(r20); + + vec_float4 x21_v = spu_splats(x21); + vec_float4 r21_v = spu_splats(r21); + + vec_float4 x22_v = spu_splats(x22); + vec_float4 r22_v = spu_splats(r22); + + vec_float4 x23_v = spu_splats(x23); + vec_float4 r23_v = spu_splats(r23); + + vec_float4 x24_v = spu_splats(x24); + vec_float4 r24_v = spu_splats(r24); + + vec_float4 x25_v = spu_splats(x25); + vec_float4 r25_v = spu_splats(r25); + + vec_float4 res_v; + + TEST_START("logbf4"); + + res_v = (vec_float4)logbf4(x0_v); + TEST_CHECK("20060905000000AAN", allequal_float4( res_v, r0_v ), 0); + res_v = (vec_float4)logbf4(x1_v); + TEST_CHECK("20060905000001AAN", allequal_float4( res_v, r1_v ), 0); + res_v = (vec_float4)logbf4(x2_v); + TEST_CHECK("20060905000002AAN", allequal_float4( res_v, r2_v ), 0); + res_v = (vec_float4)logbf4(x3_v); + TEST_CHECK("20060905000003AAN", allequal_float4( res_v, r3_v ), 0); + res_v = (vec_float4)logbf4(x4_v); + TEST_CHECK("20060905000004AAN", allequal_float4( res_v, r4_v ), 0); + res_v = (vec_float4)logbf4(x5_v); + TEST_CHECK("20060905000005AAN", allequal_float4( res_v, r5_v ), 0); + res_v = (vec_float4)logbf4(x6_v); + TEST_CHECK("20060905000006AAN", allequal_float4( res_v, r6_v ), 0); + res_v = (vec_float4)logbf4(x7_v); + TEST_CHECK("20060905000007AAN", allequal_float4( res_v, r7_v ), 0); + res_v = (vec_float4)logbf4(x8_v); + TEST_CHECK("20060905000008AAN", allequal_float4( res_v, r8_v ), 0); + res_v = (vec_float4)logbf4(x9_v); + TEST_CHECK("20060905000009AAN", allequal_float4( res_v, r9_v ), 0); + res_v = (vec_float4)logbf4(x10_v); + TEST_CHECK("20060905000010AAN", allequal_float4( res_v, r10_v ), 0); + res_v = (vec_float4)logbf4(x11_v); + TEST_CHECK("20060905000011AAN", allequal_float4( res_v, r11_v ), 0); + res_v = (vec_float4)logbf4(x12_v); + TEST_CHECK("20060905000012AAN", allequal_float4( res_v, r12_v ), 0); + res_v = (vec_float4)logbf4(x13_v); + TEST_CHECK("20060905000013AAN", allequal_float4( res_v, r13_v ), 0); + res_v = (vec_float4)logbf4(x14_v); + TEST_CHECK("20060905000014AAN", allequal_float4( res_v, r14_v ), 0); + res_v = (vec_float4)logbf4(x15_v); + TEST_CHECK("20060905000015AAN", allequal_float4( res_v, r15_v ), 0); + res_v = (vec_float4)logbf4(x16_v); + TEST_CHECK("20060905000016AAN", allequal_float4( res_v, r16_v ), 0); + res_v = (vec_float4)logbf4(x17_v); + TEST_CHECK("20060905000017AAN", allequal_float4( res_v, r17_v ), 0); + res_v = (vec_float4)logbf4(x18_v); + TEST_CHECK("20060905000018AAN", allequal_float4( res_v, r18_v ), 0); + res_v = (vec_float4)logbf4(x19_v); + TEST_CHECK("20060905000019AAN", allequal_float4( res_v, r19_v ), 0); + res_v = (vec_float4)logbf4(x20_v); + TEST_CHECK("20060905000020AAN", allequal_float4( res_v, r20_v ), 0); + res_v = (vec_float4)logbf4(x21_v); + TEST_CHECK("20060905000021AAN", allequal_float4( res_v, r21_v ), 0); + res_v = (vec_float4)logbf4(x22_v); + TEST_CHECK("20060905000022AAN", allequal_float4( res_v, r22_v ), 0); + res_v = (vec_float4)logbf4(x23_v); + TEST_CHECK("20060905000023AAN", allequal_float4( res_v, r23_v ), 0); + res_v = (vec_float4)logbf4(x24_v); + TEST_CHECK("20060905000024AAN", allequal_float4( res_v, r24_v ), 0); + res_v = (vec_float4)logbf4(x25_v); + TEST_CHECK("20060905000025AAN", allequal_float4( res_v, r25_v ), 0); + res_v = (vec_float4)logbf4(x26_v); + TEST_CHECK("20060905000026AAN", allequal_float4( res_v, r26_v ), 0); + res_v = (vec_float4)logbf4(x27_v); + TEST_CHECK("20060905000027AAN", allequal_float4( res_v, r27_v ), 0); + res_v = (vec_float4)logbf4(x28_v); + TEST_CHECK("20060905000028AAN", allequal_float4( res_v, r28_v ), 0); + res_v = (vec_float4)logbf4(x29_v); + TEST_CHECK("20060905000029AAN", allequal_float4( res_v, r29_v ), 0); + res_v = (vec_float4)logbf4(x30_v); + TEST_CHECK("20060905000030AAN", allequal_float4( res_v, r30_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/modfd2.c b/Extras/simdmathlibrary/spu/tests/modfd2.c new file mode 100644 index 000000000..88ee903fe --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/modfd2.c @@ -0,0 +1,109 @@ +/* Test modfd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060901173000MH", "MH", "modfd2"); + + unsigned long long i3 = 0x432fffffffffffffull; // 2^52 - 0.5, largest truncatable value. + unsigned long long i3i = 0x432ffffffffffffeull; + unsigned long long i4 = 0x4330000000000000ull; // 2^53, no fractional part. + unsigned long long i5 = 0xcff0000000000001ull; // one more large, and negative, value. + + double x0 = hide_double(0.91825); + double x0i = hide_double(0.0); + double x0f = hide_double(0.91825); + + double x1 = hide_double(-0.12958); + double x1i = hide_double(0.0); + double x1f = hide_double(-0.12958); + + double x2 = hide_double(-79615.1875); + double x2i = hide_double(-79615.0); + double x2f = hide_double(-0.1875); + + double x3 = hide_double(make_double(i3)); + double x3i = hide_double(make_double(i3i)); + double x3f = hide_double(0.5); + + double x4 = hide_double(make_double(i4)); + double x4i = hide_double(make_double(i4)); + double x4f = hide_double(0.0); + + double x5 = hide_double(make_double(i5)); + double x5i = hide_double(make_double(i5)); + double x5f = hide_double(0.0); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 x0i_v = spu_splats(x0i); + vec_double2 x0f_v = spu_splats(x0f); + vec_double2 x1_v = spu_splats(x1); + vec_double2 x1i_v = spu_splats(x1i); + vec_double2 x1f_v = spu_splats(x1f); + vec_double2 x2_v = spu_splats(x2); + vec_double2 x2i_v = spu_splats(x2i); + vec_double2 x2f_v = spu_splats(x2f); + vec_double2 x3_v = spu_splats(x3); + vec_double2 x3i_v = spu_splats(x3i); + vec_double2 x3f_v = spu_splats(x3f); + vec_double2 x4_v = spu_splats(x4); + vec_double2 x4i_v = spu_splats(x4i); + vec_double2 x4f_v = spu_splats(x4f); + vec_double2 x5_v = spu_splats(x5); + vec_double2 x5i_v = spu_splats(x5i); + vec_double2 x5f_v = spu_splats(x5f); + + vec_double2 integer_v, fraction_v; + + TEST_START("modff4"); + fraction_v = modfd2(x0_v, &integer_v); + TEST_CHECK("20040916170647EJL", allequal_double2( integer_v, x0i_v ) && allequal_double2( fraction_v, x0f_v ), 0); + fraction_v = modfd2(x1_v, &integer_v); + TEST_CHECK("20040916170650EJL", allequal_double2( integer_v, x1i_v ) && allequal_double2( fraction_v, x1f_v ), 0); + fraction_v = modfd2(x2_v, &integer_v); + TEST_CHECK("20040916170653EJL", allequal_double2( integer_v, x2i_v ) && allequal_double2( fraction_v, x2f_v ), 0); + fraction_v = modfd2(x3_v, &integer_v); + TEST_CHECK("20040916170656EJL", allequal_double2( integer_v, x3i_v ) && allequal_double2( fraction_v, x3f_v ), 0); + fraction_v = modfd2(x4_v, &integer_v); + TEST_CHECK("20040916170658EJL", allequal_double2( integer_v, x4i_v ) && allequal_double2( fraction_v, x4f_v ), 0); + fraction_v = modfd2(x5_v, &integer_v); + TEST_CHECK("20040916170701EJL", allequal_double2( integer_v, x5i_v ) && allequal_double2( fraction_v, x5f_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/modff4.c b/Extras/simdmathlibrary/spu/tests/modff4.c new file mode 100644 index 000000000..df547a906 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/modff4.c @@ -0,0 +1,124 @@ +/* Test modff4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040916170642EJL", "EJL", "modff"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x0f = hide_float(0.91825f); + + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x1f = hide_float(-0.12958f); + + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x2f = hide_float(-0.1875f); + + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x3f = hide_float(0.5f); + + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x4f = hide_float(0.0f); + + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + float x5f = hide_float(0.0f); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 x0i_v = spu_splats(x0i); + vec_float4 x0f_v = spu_splats(x0f); + vec_float4 x1_v = spu_splats(x1); + vec_float4 x1i_v = spu_splats(x1i); + vec_float4 x1f_v = spu_splats(x1f); + vec_float4 x2_v = spu_splats(x2); + vec_float4 x2i_v = spu_splats(x2i); + vec_float4 x2f_v = spu_splats(x2f); + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3i_v = spu_splats(x3i); + vec_float4 x3f_v = spu_splats(x3f); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4i_v = spu_splats(x4i); + vec_float4 x4f_v = spu_splats(x4f); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5i_v = spu_splats(x5i); + vec_float4 x5f_v = spu_splats(x5f); + + float integer, fraction; + vec_float4 integer_v, fraction_v; + + TEST_START("modff4"); + fraction_v = modff4(x0_v, &integer_v); + TEST_CHECK("20040916170647EJL", allequal_float4( integer_v, x0i_v ) && allequal_float4( fraction_v, x0f_v ), 0); + fraction_v = modff4(x1_v, &integer_v); + TEST_CHECK("20040916170650EJL", allequal_float4( integer_v, x1i_v ) && allequal_float4( fraction_v, x1f_v ), 0); + fraction_v = modff4(x2_v, &integer_v); + TEST_CHECK("20040916170653EJL", allequal_float4( integer_v, x2i_v ) && allequal_float4( fraction_v, x2f_v ), 0); + fraction_v = modff4(x3_v, &integer_v); + TEST_CHECK("20040916170656EJL", allequal_float4( integer_v, x3i_v ) && allequal_float4( fraction_v, x3f_v ), 0); + fraction_v = modff4(x4_v, &integer_v); + TEST_CHECK("20040916170658EJL", allequal_float4( integer_v, x4i_v ) && allequal_float4( fraction_v, x4f_v ), 0); + fraction_v = modff4(x5_v, &integer_v); + TEST_CHECK("20040916170701EJL", allequal_float4( integer_v, x5i_v ) && allequal_float4( fraction_v, x5f_v ), 0); + + TEST_START("modff"); + fraction = modff(x0, &integer); + TEST_CHECK("20040916170704EJL", integer == x0i && fraction == x0f, 0); + fraction = modff(x1, &integer); + TEST_CHECK("20040916170706EJL", integer == x1i && fraction == x1f, 0); + fraction = modff(x2, &integer); + TEST_CHECK("20040916170709EJL", integer == x2i && fraction == x2f, 0); + fraction = modff(x3, &integer); + TEST_CHECK("20040916170711EJL", integer == x3i && fraction == x3f, 0); + fraction = modff(x4, &integer); + TEST_CHECK("20040916170714EJL", integer == x4i && fraction == x4f, 0); + fraction = modff(x5, &integer); + TEST_CHECK("20040916170716EJL", integer == x5i && fraction == x5f, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/nearbyintd2.c b/Extras/simdmathlibrary/spu/tests/nearbyintd2.c new file mode 100644 index 000000000..e15a1f6a0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/nearbyintd2.c @@ -0,0 +1,178 @@ +/* Test nearbyintd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ nearbyintd2 - Round the input to the nearest integer according to + * the current rounding mode without raising an inexact exception. + * + *@brief + * boundary test for nearbyintd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + **/ + + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int ans0[2]; + unsigned long long int ans1[2]; + unsigned long long int ans2[2]; + unsigned long long int ans3[2]; +} TestVec64_NerI; + +int main() +{ + TestVec64_NerI test_a[] = { + { + // zero + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // border + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL} + },{ + // MIN , MAX + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL} + },{ + // denotmalized + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x3ff0000000000000ULL}, + {0xbff0000000000000ULL,0x0000000000000000ULL} + },{ + // denotmalized + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x3ff0000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0xbff0000000000000ULL} + },{ + // 1.0 + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL} + },{ + // 1.5 + {0x3ff8000000000000ULL,0xbff8000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x4000000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xc000000000000000ULL} + },{ + // 2.5 + {0x4004000000000000ULL,0xc004000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x4008000000000000ULL,0xc000000000000000ULL}, + {0x4000000000000000ULL,0xc008000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060829200000NM","NM", "nearbyintd2"); + + TEST_START("nearbyintd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + spu_mtfpscr(((vec_uint4){0x0000,0,0,0})); + res_v = nearbyintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006082920%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans0[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0500,0,0,0})); + res_v = nearbyintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006082920%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans1[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0a00,0,0,0})); + res_v = nearbyintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006082920%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans2[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0f00,0,0,0})); + res_v = nearbyintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006082920%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans3[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/nearbyintf4.c b/Extras/simdmathlibrary/spu/tests/nearbyintf4.c new file mode 100644 index 000000000..ca6a72bf0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/nearbyintf4.c @@ -0,0 +1,95 @@ +/* Test nearbyintf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#define DEFINE_DATA(var,a,b) \ + float var = hide_float(a); \ + float var ## _out = hide_float(b); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +/* + */ +#define DEFINE_DATA_UNSIGNED(var,a,b)\ + unsigned int var ## _ina = a ; \ + unsigned int var ## _inb = b ; \ + float var = make_float(var ## _ina); \ + float var ## _out = make_float(var ## _inb); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +#define DO_TEST(var,id) \ + res_v = nearbyintf4(var ## _v); \ + TEST_CHECK(" #id ", allequal_float4( res_v, var ## _out_v ), 0); + + +int main() +{ + vec_float4 res_v; + + TEST_SET_START("625018616200","NBI", "nearbyintf4"); + + + + + //s=0, e=100, f=7fffff --> s=0, e=100, f=7fffff + DEFINE_DATA_UNSIGNED(x1,0x71ffffff,0x71ffffff) + //s=0, e=22, f=0x7fffff --> s=0,e=22,f=0x7ffffe + DEFINE_DATA_UNSIGNED(x2, 0x4affffff,0x4afffffe) + //s=0, e=23, f=0 --> s=0,e=23,f=0 + DEFINE_DATA_UNSIGNED(x3, 0x4b000000,0x4b000000) + //s=0, e=-126, f=0 --> 0 + DEFINE_DATA_UNSIGNED(x4, 0x800000,0x0) + DEFINE_DATA(x5, 1.001f, 1.f) + DEFINE_DATA(x6, -.05f, 0.f) + DEFINE_DATA(x7, 0.9999f, 0.f) + DEFINE_DATA(x8, 0.4999f, 0.f) + + TEST_START("nearbyintf4"); + DO_TEST(x1,625018616201NBI) + DO_TEST(x2,625018616202NBI) + DO_TEST(x3,625018616203NBI) + DO_TEST(x4,625018616204NBI) + DO_TEST(x5,625018616205NBI) + DO_TEST(x6,625018616206NBI) + DO_TEST(x7,625018616207NBI) + DO_TEST(x8,625018616208NBI) + TEST_SET_DONE(); + + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/negated2.c b/Extras/simdmathlibrary/spu/tests/negated2.c new file mode 100644 index 000000000..777db15af --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/negated2.c @@ -0,0 +1,83 @@ +/* Test negated2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040930102626EJL","EJL", "negated2"); + + double x0n = hide_double(-0.0); + double x0p = hide_double(0.0); + double x1n = hide_double(-83532.96153153); + double x1p = hide_double(83532.96153153); + double x2n = hide_double(-0.0000000013152); + double x2p = hide_double(0.0000000013152); + double x3n = hide_double(-1.0/0.0); + double x3p = hide_double(1.0/0.0); + + vec_double2 x0n_v = spu_splats(x0n); + vec_double2 x0p_v = spu_splats(x0p); + vec_double2 x1n_v = spu_splats(x1n); + vec_double2 x1p_v = spu_splats(x1p); + vec_double2 x2n_v = spu_splats(x2n); + vec_double2 x2p_v = spu_splats(x2p); + vec_double2 x3n_v = spu_splats(x3n); + vec_double2 x3p_v = spu_splats(x3p); + + vec_double2 res_v; + + TEST_START("negated2"); + res_v = negated2(x0n_v); + TEST_CHECK("20040930102629EJL", allequal_double2( res_v, x0p_v ), 0); + res_v = negated2(x0p_v); + TEST_CHECK("20040930102631EJL", allequal_double2( res_v, x0n_v ), 0); + res_v = negated2(x1n_v); + TEST_CHECK("20040930102632EJL", allequal_double2( res_v, x1p_v ), 0); + res_v = negated2(x1p_v); + TEST_CHECK("20040930102635EJL", allequal_double2( res_v, x1n_v ), 0); + res_v = negated2(x2n_v); + TEST_CHECK("20040930102637EJL", allequal_double2( res_v, x2p_v ), 0); + res_v = negated2(x2p_v); + TEST_CHECK("20040930102639EJL", allequal_double2( res_v, x2n_v ), 0); + res_v = negated2(x3n_v); + TEST_CHECK("20040930102641EJL", allposinf_double2( res_v ), 0); + res_v = negated2(x3p_v); + TEST_CHECK("20040930102643EJL", allneginf_double2( res_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/negatef4.c b/Extras/simdmathlibrary/spu/tests/negatef4.c new file mode 100644 index 000000000..cb9310e43 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/negatef4.c @@ -0,0 +1,87 @@ +/* Test negatef4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040930102649EJL","EJL", "negatef4"); + + unsigned int i3n = 0xffffffff; + unsigned int i3p = 0x7fffffff; + + float x0n = hide_float(-0.0f); + float x0p = hide_float(0.0f); + float x1n = hide_float(-83532.96153153f); + float x1p = hide_float(83532.96153153f); + float x2n = hide_float(-0.0000000013152f); + float x2p = hide_float(0.0000000013152f); + float x3n = hide_float(make_float(i3n)); + float x3p = hide_float(make_float(i3p)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0p_v = spu_splats(x0p); + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1p_v = spu_splats(x1p); + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2p_v = spu_splats(x2p); + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3p_v = spu_splats(x3p); + + vec_float4 res_v; + + TEST_START("negatef4"); + res_v = negatef4(x0n_v); + TEST_CHECK("20040930102652EJL", allequal_float4( res_v, x0p_v ), 0); + res_v = negatef4(x0p_v); + TEST_CHECK("20040930102653EJL", allequal_float4( res_v, x0n_v ), 0); + res_v = negatef4(x1n_v); + TEST_CHECK("20040930102655EJL", allequal_float4( res_v, x1p_v ), 0); + res_v = negatef4(x1p_v); + TEST_CHECK("20040930102657EJL", allequal_float4( res_v, x1n_v ), 0); + res_v = negatef4(x2n_v); + TEST_CHECK("20040930102659EJL", allequal_float4( res_v, x2p_v ), 0); + res_v = negatef4(x2p_v); + TEST_CHECK("20040930102701EJL", allequal_float4( res_v, x2n_v ), 0); + res_v = negatef4(x3n_v); + TEST_CHECK("20040930102703EJL", allequal_float4( res_v, x3p_v ), 0); + res_v = negatef4(x3p_v); + TEST_CHECK("20040930102705EJL", allequal_float4( res_v, x3n_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/negatei4.c b/Extras/simdmathlibrary/spu/tests/negatei4.c new file mode 100644 index 000000000..5ef6df510 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/negatei4.c @@ -0,0 +1,83 @@ +/* Test negatei4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040930102649EJL","EJL", "negatei4"); + + int x0n = hide_int(0); + int x0p = hide_int(0); + int x1n = hide_int(-83532); + int x1p = hide_int(83532); + int x2n = hide_int(-13152); + int x2p = hide_int(13152); + int x3n = hide_int(-1); + int x3p = hide_int(1); + + vec_int4 x0n_v = spu_splats(x0n); + vec_int4 x0p_v = spu_splats(x0p); + vec_int4 x1n_v = spu_splats(x1n); + vec_int4 x1p_v = spu_splats(x1p); + vec_int4 x2n_v = spu_splats(x2n); + vec_int4 x2p_v = spu_splats(x2p); + vec_int4 x3n_v = spu_splats(x3n); + vec_int4 x3p_v = spu_splats(x3p); + + vec_int4 res_v; + + TEST_START("negatei4"); + res_v = negatei4(x0n_v); + TEST_CHECK("20040930102652EJL", allequal_int4( res_v, x0p_v ), 0); + res_v = negatei4(x0p_v); + TEST_CHECK("20040930102653EJL", allequal_int4( res_v, x0n_v ), 0); + res_v = negatei4(x1n_v); + TEST_CHECK("20040930102655EJL", allequal_int4( res_v, x1p_v ), 0); + res_v = negatei4(x1p_v); + TEST_CHECK("20040930102657EJL", allequal_int4( res_v, x1n_v ), 0); + res_v = negatei4(x2n_v); + TEST_CHECK("20040930102659EJL", allequal_int4( res_v, x2p_v ), 0); + res_v = negatei4(x2p_v); + TEST_CHECK("20040930102701EJL", allequal_int4( res_v, x2n_v ), 0); + res_v = negatei4(x3n_v); + TEST_CHECK("20040930102703EJL", allequal_int4( res_v, x3p_v ), 0); + res_v = negatei4(x3p_v); + TEST_CHECK("20040930102705EJL", allequal_int4( res_v, x3n_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/negatell2.c b/Extras/simdmathlibrary/spu/tests/negatell2.c new file mode 100644 index 000000000..76d346f20 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/negatell2.c @@ -0,0 +1,86 @@ +/* Test negatell2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060731203500MH","MH", "negatell2"); + + vec_llong2 x0n_v = spu_splats(0LL); + vec_llong2 x0p_v = spu_splats(0LL); + vec_llong2 x1n_v = spu_splats(-83582LL); + vec_llong2 x1p_v = spu_splats(83582LL); + vec_llong2 x2n_v = spu_splats(-13152LL); + vec_llong2 x2p_v = spu_splats(13152LL); + vec_llong2 x3n_v = spu_splats(-1LL); + vec_llong2 x3p_v = spu_splats(1LL); + vec_llong2 x4n_v = spu_splats((long long)0x8000000000000001ULL); + vec_llong2 x4p_v = spu_splats(0x7fffffffffffffffLL); + vec_llong2 x5n_v = spu_splats((long long)0x8000000100000000ULL); + vec_llong2 x5p_v = spu_splats(0x7fffffff00000000LL); + + vec_llong2 res_v; + + TEST_START("negatell2"); + res_v = negatell2(x0n_v); + TEST_CHECK("20060731203501MH", allequal_llong2( res_v, x0p_v ), 0); + res_v = negatell2(x0p_v); + TEST_CHECK("20060731203502MH", allequal_llong2( res_v, x0n_v ), 0); + res_v = negatell2(x1n_v); + TEST_CHECK("20060731203503MH", allequal_llong2( res_v, x1p_v ), 0); + res_v = negatell2(x1p_v); + TEST_CHECK("20060731203504MH", allequal_llong2( res_v, x1n_v ), 0); + res_v = negatell2(x2n_v); + TEST_CHECK("20060731203505MH", allequal_llong2( res_v, x2p_v ), 0); + res_v = negatell2(x2p_v); + TEST_CHECK("20060731203506MH", allequal_llong2( res_v, x2n_v ), 0); + res_v = negatell2(x3n_v); + TEST_CHECK("20060731203507MH", allequal_llong2( res_v, x3p_v ), 0); + res_v = negatell2(x3p_v); + TEST_CHECK("20060731203508MH", allequal_llong2( res_v, x3n_v ), 0); + res_v = negatell2(x4n_v); + TEST_CHECK("20060731203509MH", allequal_llong2( res_v, x4p_v ), 0); + res_v = negatell2(x4p_v); + TEST_CHECK("20060731203510MH", allequal_llong2( res_v, x4n_v ), 0); + res_v = negatell2(x5n_v); + TEST_CHECK("20060731203511MH", allequal_llong2( res_v, x5p_v ), 0); + res_v = negatell2(x5p_v); + TEST_CHECK("20060731203512MH", allequal_llong2( res_v, x5n_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/nextafterd2.c b/Extras/simdmathlibrary/spu/tests/nextafterd2.c new file mode 100644 index 000000000..cb409783c --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/nextafterd2.c @@ -0,0 +1,153 @@ +/* Test nextafterd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ nextafterd2 - find next representable floating-point value towards 2nd param. + * + *@brief + * boundary test for nextafterd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int yyy[2]; + unsigned long long int ans[2]; +} TestVec64_NexA; + +int main() +{ + TestVec64_NexA test_a[] = { + { + // -1 -> -0 , 0 -> -1 + {0x8000000000000001ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x8000000000000001ULL}, + {0x8000000000000000ULL,0x8000000000000001ULL} + },{ + // -1 -> -0 , 0 -> -1 + {0x8000000000000001ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000001ULL}, + {0x8000000000000000ULL,0x8000000000000001ULL} + },{ + // 0 -> -0 , -0 -> 0 + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL} + },{ + // -Inf -> MIN , Inf -> MAX + {0xFFF0000000000000ULL,0x7FF0000000000000ULL}, + {0x0010000000000000ULL,0x0000000000000000ULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL} + },{ + // MAX -> Inf , MIN -> -Inf + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL} + },{ + // Denormalize -> Denormalize + {0x0000000000000001ULL,0x8000000000000010ULL}, + {0x0000000000000003ULL,0x8000000000000020ULL}, + {0x0000000000000002ULL,0x8000000000000011ULL} + },{ + // Denormalize -> Normalize + {0x000FFFFFFFFFFFFFULL,0x800FFFFFFFFFFFFFULL}, + {0x0020000000000000ULL,0x8020000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL} + },{ + // Normalize -> Denormalize + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x8010000000000000ULL,0x0020000000000000ULL}, + {0x000FFFFFFFFFFFFFULL,0x800FFFFFFFFFFFFFULL} + },{ + // equal + {0x8FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL}, + {0x8FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL}, + {0x8FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL} + },{ + // + {0x8FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL}, + {0x9FFFFFFFFFFFFFFFULL,0x1FFFFFFFFFFFFFFFULL}, + {0x9000000000000000ULL,0x1000000000000000ULL} + },{ + // + {0x7000000000000000ULL,0xF000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x6FFFFFFFFFFFFFFFULL,0xEFFFFFFFFFFFFFFFULL} + },{ + // Nan + {0x7000000000000000ULL,0xF000000000000000ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060828130000NM","NM", "nextafterd2"); + + TEST_START("nextafterd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + res_v = nextafterd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_double2 *)&test_a[ii].yyy[0])); + sprintf(msg,"2006082813%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/nextafterf4.c b/Extras/simdmathlibrary/spu/tests/nextafterf4.c new file mode 100644 index 000000000..c6cea845f --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/nextafterf4.c @@ -0,0 +1,162 @@ +/* Test nextafterf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +//a :float, b: float: c:bit pattern +#define DEFINE_DATA(var,a,b,c) \ + float var ## _a = a;\ +vec_float4 var ## _a_v = spu_splats(var ## _a); \ + float var ## _b = b;\ +vec_float4 var ## _b_v = spu_splats(var ## _b); \ + unsigned int var ## _inc = c ; \ + float var ## _out = make_float(var ## _inc);\ + vec_float4 var ## _out_v = spu_splats(var ## _out);\ + +//a :bit pattern, b: bit pattern: c:bit pattern +#define DEFINE_DATA_UNSIGNED(var,a,b,c) \ + unsigned int var ## _ina = a ; \ + float var ## _a = make_float(var ## _ina); \ + vec_float4 var ## _a_v = spu_splats(var ## _a); \ + unsigned int var ## _inb = b ; \ + float var ## _b = make_float(var ## _inb); \ + vec_float4 var ## _b_v = spu_splats(var ## _b); \ + unsigned int var ## _inc = c ; \ + float var ## _out = make_float(var ## _inc); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +#define DO_TEST(var,id) \ + res_v = nextafterf4(var ## _a_v, var ## _b_v);\ + TEST_CHECK(" #id ", allequal_float4( res_v, var ## _out_v ), 0); + + +int main() +{ + vec_float4 res_v; + + TEST_SET_START("958726589700","NAR", "nextafterf4"); + + + + // == + // 1.0f --> 1.0f + DEFINE_DATA(x1, 1.0f, 1.0f, 0x3f800000) + DEFINE_DATA(x2, 0.0f, 0.0f, 0x0) + + // * Icrement * + + // -FLT_MAX -> + DEFINE_DATA_UNSIGNED(x3,0xffffffff, 0x0, 0xfffffffe) + //(1, 40, 0) --> (1, 39, 7fffff) + DEFINE_DATA_UNSIGNED(x4,0xd3800000, 0x0, 0xd37fffff) + // (1,-40,0 ) --> (1,-41,0x7fffff) + DEFINE_DATA_UNSIGNED(x5,0xab800000, 0x0, 0xab7fffff) + //-FLT_MIN --> 0 + DEFINE_DATA_UNSIGNED(x6,0x80800000, 0x0, 0x0) + //0.0f --> FLT_MIN + DEFINE_DATA(x7, 0.0f, 1.0f, 0x800000) + //-0.0f --> FLT_MIN + DEFINE_DATA_UNSIGNED(x8, 0x80000000, 0x7fffffff, 0x800000) + //FLT_MIN --> + DEFINE_DATA_UNSIGNED(x9, 0x800000, 0x7fffffff, 0x800001) + // (0, -41, 7fffff) --> (0, -40, 0) + DEFINE_DATA_UNSIGNED(x10, 0x2b7fffff, 0x7fffffff, 0x2b800000) + // (0, 40, 7fffff) --> (0, 41, 0) + DEFINE_DATA_UNSIGNED(x11, 0x53ffffff, 0x7fffffff, 0x54000000) + // FLT_MAX --> + DEFINE_DATA_UNSIGNED(x12,0x7fffffff,0x7fffffff,0x7fffffff) + + // * Decrement * + + // FLT_MAX --> FLT_MAX + DEFINE_DATA_UNSIGNED(x13,0x7fffffff,0x7fffffff,0x7fffffff) + // FLT_MAX --> + DEFINE_DATA_UNSIGNED(x14,0x7fffffff,0x0,0x7ffffffe) + // (0, 41, 0) --> (0, 40, 7fffff) + DEFINE_DATA_UNSIGNED(x15, 0x54000000, 0x0, 0x53ffffff) + // (0, -40, 0) --> (0, -41, 7fffff) + DEFINE_DATA_UNSIGNED(x16, 0x2b800000,0x0, 0x2b7fffff) + // -> FLT_MIN + DEFINE_DATA_UNSIGNED(x17, 0x800001, 0x800000, 0x800000) + // FLT_MIN --> 0 + DEFINE_DATA_UNSIGNED(x18, 0x800000, 0x0, 0x0) + // 0.0 -> -FLT_MIN + DEFINE_DATA_UNSIGNED(x19, 0x0, 0xffffffff, 0x80800000) + // -0.0 -> FLT_MIN + DEFINE_DATA_UNSIGNED(x20, 0x80000000, 0xffffffff, 0x80800000) + //-FLT_MIN --> + DEFINE_DATA_UNSIGNED(x21, 0x80800000, 0xffffffff, 0x80800001) + // (1,-41,0x7fffff) --> (1,-40,0 ) + DEFINE_DATA_UNSIGNED(x22, 0xab7fffff, 0xffffffff, 0xab800000) + //(1, 40, 0) --> (1, 39, 7fffff) + DEFINE_DATA_UNSIGNED(x23, 0xd37fffff, 0xffffffff, 0xd3800000) + // --> -FLT_MAX + DEFINE_DATA_UNSIGNED(x24,0xfffffffe, 0xffffffff, 0xffffffff) + + + //TEST + TEST_START("nextafterf4"); + DO_TEST(x1,958726589701NAR) + DO_TEST(x2,958726589702NAR) + DO_TEST(x3,958726589703NAR) + DO_TEST(x4,958726589704NAR) + DO_TEST(x5,958726589705NAR) + DO_TEST(x6,958726589706NAR) + DO_TEST(x7,958726589707NAR) + DO_TEST(x8,958726589708NAR) + DO_TEST(x9,958726589709NAR) + DO_TEST(x10,958726589710NAR) + DO_TEST(x11,958726589711NAR) + DO_TEST(x12,958726589712NAR) + DO_TEST(x13,958726589713NAR) + DO_TEST(x14,958726589714NAR) + DO_TEST(x15,958726589715NAR) + DO_TEST(x16,958726589716NAR) + DO_TEST(x17,958726589717NAR) + DO_TEST(x18,958726589718NAR) + DO_TEST(x19,958726589719NAR) + DO_TEST(x20,958726589720NAR) + DO_TEST(x21,958726589721NAR) + DO_TEST(x22,958726589722NAR) + DO_TEST(x23,958726589723NAR) + DO_TEST(x24,958726589724NAR) + + TEST_SET_DONE(); + + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/recipd2.c b/Extras/simdmathlibrary/spu/tests/recipd2.c new file mode 100644 index 000000000..6d14ed53d --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/recipd2.c @@ -0,0 +1,131 @@ +/* Test recipd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040920095218EJL","EJL", "recipd2"); + + unsigned long long i6 = 0x7fd0000000000001ull; // 2^1022 + 1 ulp + unsigned long long i7 = 0xffd0000000000000ull; // -2^1022 + unsigned long long i7r = 0x8010000000000000ull; // -2^-1022 + unsigned long long i8 = 0x7606a4533cf5605eull; // random values + unsigned long long i8r = 0x09d69cea2b5b5b57ull; + unsigned long long i9 = 0x4c042c295376566eull; + unsigned long long i9r = 0x33d9618e87b961f4ull; + unsigned long long i10 = 0x39b3720562510408ull; + unsigned long long i10r = 0x462a54842d7f9b5dull; + unsigned long long i11 = 0x6911a64538a389aeull; + unsigned long long i11r = 0x16cd02637ed13ff2ull; + unsigned long long i12 = 0x1ac4d062d451c99dull; + unsigned long long i12r = 0x6518994c26ebbb3eull; + + double x0 = hide_double(-1.0/0.0); // -Inf + double x1 = hide_double(1.0/0.0); // Inf + double x2 = hide_double(0.0); // 0 + double x3 = hide_double(-0.0); // -0 + double x4 = hide_double(0.0/0.0); // NaN + double x5 = hide_double(2.0); + double x5r = hide_double(0.5); + double x6 = hide_double(make_double(i6)); + double x7 = hide_double(make_double(i7)); + double x7r = hide_double(make_double(i7r)); + double x8 = hide_double(make_double(i8)); + double x8r = hide_double(make_double(i8r)); + double x9 = hide_double(make_double(i9)); + double x9r = hide_double(make_double(i9r)); + double x10 = hide_double(make_double(i10)); + double x10r = hide_double(make_double(i10r)); + double x11 = hide_double(make_double(i11)); + double x11r = hide_double(make_double(i11r)); + double x12 = hide_double(make_double(i12)); + double x12r = hide_double(make_double(i12r)); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 x1_v = spu_splats(x1); + vec_double2 x2_v = spu_splats(x2); + vec_double2 x3_v = spu_splats(x3); + vec_double2 x4_v = spu_splats(x4); + vec_double2 x5_v = spu_splats(x5); + vec_double2 x5r_v = spu_splats(x5r); + vec_double2 x6_v = spu_splats(x6); + vec_double2 x7_v = spu_splats(x7); + vec_double2 x7r_v = spu_splats(x7r); + vec_double2 x8_v = spu_splats(x8); + vec_double2 x8r_v = spu_splats(x8r); + vec_double2 x9_v = spu_splats(x9); + vec_double2 x9r_v = spu_splats(x9r); + vec_double2 x10_v = spu_splats(x10); + vec_double2 x10r_v = spu_splats(x10r); + vec_double2 x11_v = spu_splats(x11); + vec_double2 x11r_v = spu_splats(x11r); + vec_double2 x12_v = spu_splats(x12); + vec_double2 x12r_v = spu_splats(x12r); + + vec_double2 res_v; + + TEST_START("recipd2"); + res_v = recipd2(x0_v); + TEST_CHECK("20040920095224EJL", allnegzero_double2( res_v ), 0); + res_v = recipd2(x1_v); + TEST_CHECK("20040920095226EJL", allposzero_double2( res_v ), 0); + res_v = recipd2(x2_v); + TEST_CHECK("20040920095228EJL", allposinf_double2( res_v ), 0); + res_v = recipd2(x3_v); + TEST_CHECK("20040920095233EJL", allneginf_double2( res_v ), 0); + res_v = recipd2(x4_v); + TEST_CHECK("20040920095235EJL", allnan_double2( res_v ), 0); + res_v = recipd2(x5_v); + TEST_CHECK("20040920095237EJL", allequal_double2( res_v, x5r_v ), 0); + res_v = recipd2(x6_v); + TEST_CHECK("20040920095239EJL", allzerodenorm_double2( res_v ), 0); + res_v = recipd2(x7_v); + TEST_CHECK("20040920095242EJL", allequal_double2( res_v, x7r_v ), 0); + res_v = recipd2(x8_v); + TEST_CHECK("20040920095245EJL", allequal_ulps_double2( res_v, x8r_v, 1 ), 0); + res_v = recipd2(x9_v); + TEST_CHECK("20040920095247EJL", allequal_ulps_double2( res_v, x9r_v, 1 ), 0); + res_v = recipd2(x10_v); + TEST_CHECK("20040920095248EJL", allequal_ulps_double2( res_v, x10r_v, 1 ), 0); + res_v = recipd2(x11_v); + TEST_CHECK("20040920095250EJL", allequal_ulps_double2( res_v, x11r_v, 1 ), 0); + res_v = recipd2(x12_v); + TEST_CHECK("20040920095252EJL", allequal_ulps_double2( res_v, x12r_v, 1 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/recipf4.c b/Extras/simdmathlibrary/spu/tests/recipf4.c new file mode 100644 index 000000000..eb252c14c --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/recipf4.c @@ -0,0 +1,114 @@ +/* Test recipf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040920142553EJL","EJL", "recipf4"); + + unsigned int i0r = 0x7fffffff; + unsigned int i1 = 0xff000000; // -2^127 + unsigned int i2 = 0xfe7fffff; // -2^126 - 1 ulp + unsigned int i2r = 0x80800001; + unsigned int i3 = 0x75013340; // random values + unsigned int i3r = 0x09fd9f35; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x090d9277; + unsigned int i5 = 0x4c7fed5a; + unsigned int i5r = 0x32800954; + unsigned int i6 = 0x3a0731f0; + unsigned int i6r = 0x44f2602e; + unsigned int i7 = 0x69784a07; + unsigned int i7r = 0x1583f9a3; + + float x0 = hide_float(0.0f); + float x0r = hide_float(make_float(i0r)); + float x1 = hide_float(make_float(i1)); + float x1r = hide_float(0.0f); + float x2 = hide_float(make_float(i2)); + float x2r = hide_float(make_float(i2r)); + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 x0r_v = spu_splats(x0r); + vec_float4 x1_v = spu_splats(x1); + vec_float4 x1r_v = spu_splats(x1r); + vec_float4 x2_v = spu_splats(x2); + vec_float4 x2r_v = spu_splats(x2r); + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3r_v = spu_splats(x3r); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4r_v = spu_splats(x4r); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5r_v = spu_splats(x5r); + vec_float4 x6_v = spu_splats(x6); + vec_float4 x6r_v = spu_splats(x6r); + vec_float4 x7_v = spu_splats(x7); + vec_float4 x7r_v = spu_splats(x7r); + + vec_float4 res_v; + + TEST_START("recipf4"); + res_v = recipf4(x0_v); + TEST_CHECK("20040920142558EJL", allequal_float4( res_v, x0r_v ), 0); + res_v = recipf4(x1_v); + TEST_CHECK("20040920142600EJL", allequal_float4( res_v, x1r_v), 0); + res_v = recipf4(x2_v); + TEST_CHECK("20040920142602EJL", allequal_ulps_float4( res_v, x2r_v, 2 ), 0); + res_v = recipf4(x3_v); + TEST_CHECK("20040920142604EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = recipf4(x4_v); + TEST_CHECK("20040920142606EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = recipf4(x5_v); + TEST_CHECK("20040920142608EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = recipf4(x6_v); + TEST_CHECK("20040920142609EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = recipf4(x7_v); + TEST_CHECK("20040920142611EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/remainderd2.c b/Extras/simdmathlibrary/spu/tests/remainderd2.c new file mode 100644 index 000000000..8100c9ee7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/remainderd2.c @@ -0,0 +1,125 @@ +/* Test remainderd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int yyy[2]; + unsigned long long int a_res[2]; +} TestVec64_RemqD; + +int main() +{ + TestVec64_RemqD test_a[] = { + { + // normal 2.5/1.5 29/3 + {0x4004000000000000ULL,0x403d000000000000ULL}, + {0x3ff8000000000000ULL,0x4008000000000000ULL}, + {0xbfe0000000000000ULL,0xbff0000000000000ULL} + },{ + // normal + {0x09d0000000000006ULL,0x1000000000000000ULL}, + {0x8010000000000005ULL,0x0010000000000007ULL}, + {0x800000000000007dULL,0x80037ffffffff1a5ULL} + },{ + // denorm + {0x0000000000000001ULL,0x800ffffffffffff3ULL}, + {0x8000000000000001ULL,0x8000000000000005ULL}, + {0x0000000000000000ULL,0x0000000000000002ULL} + },{ + // divide by inf + {0xFFEFFFFFFFFFFFFFULL,0x0001000000000000ULL}, + {0x7FF0000000000000ULL,0x7FF0000000000000ULL}, + {0xFFEFFFFFFFFFFFFFULL,0x0001000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + TestVec64_RemqD test_b[] = { + { + // divide by zero -> nan + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x7ff8000000000000ULL,0x7ff8000000000000ULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7ff8000000000000ULL,0x7ff8000000000000ULL} + },{ + // border + {0xFFE0000000000000ULL,0x7FEFFFFFFFFFFFFFULL}, + {0x0008000000000000ULL,0x0010000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060919210000NM","NM", "remquod2"); + + TEST_START("remquod2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + res_v = remainderd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_double2 *)&test_a[ii].yyy[0])); + sprintf(msg,"2006092621%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].a_res[0])), 0); + } + for (ii=0; ; ii++) { + if ( (test_b[ii].xxx[0] == 0) && (test_b[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + res_v = remainderd2 (*((vec_double2 *)&test_b[ii].xxx[0]), *((vec_double2 *)&test_b[ii].yyy[0])); + sprintf(msg,"2006092623%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_b[ii].a_res[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/remainderf4.c b/Extras/simdmathlibrary/spu/tests/remainderf4.c new file mode 100644 index 000000000..7f09e0d42 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/remainderf4.c @@ -0,0 +1,143 @@ +/* Test remainderf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040928191927EJL","EJL", "remainderf4"); + + unsigned int i0n = 0x449edbc6; + unsigned int i0d = 0x40cf799d; + unsigned int i0r = 0x3daa7300; + unsigned int i1n = 0x6bca107a; + unsigned int i1d = 0x6c4a107a; + unsigned int i1r = 0x6bca107a; + unsigned int i2n = 0x1c123605; + unsigned int i2d = 0x1c923602; + unsigned int i2r = 0x9c1235ff; + unsigned int i3n = 0x2b4c50fa; + unsigned int i3d = 0x253a3ae3; + unsigned int i3r = 0xa41873a8; + unsigned int i4n = 0x73addffc; + unsigned int i4d = 0x742ddffc; + unsigned int i4r = 0x73addffc; + unsigned int i5n = 0x29d4d97c; + unsigned int i5d = 0x2a546e77; + unsigned int i5r = 0xa9d40372; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0d_v = spu_splats(x0d); + vec_float4 x0r_v = spu_splats(x0r); + + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1d_v = spu_splats(x1d); + vec_float4 x1r_v = spu_splats(x1r); + + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2d_v = spu_splats(x2d); + vec_float4 x2r_v = spu_splats(x2r); + + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3d_v = spu_splats(x3d); + vec_float4 x3r_v = spu_splats(x3r); + + vec_float4 x4n_v = spu_splats(x4n); + vec_float4 x4d_v = spu_splats(x4d); + vec_float4 x4r_v = spu_splats(x4r); + + vec_float4 x5n_v = spu_splats(x5n); + vec_float4 x5d_v = spu_splats(x5d); + vec_float4 x5r_v = spu_splats(x5r); + + float res; + vec_float4 res_v; + + TEST_START("remainderf4"); + res_v = remainderf4(x0n_v, x0d_v); + TEST_CHECK("20040928191931EJL", allequal_ulps_float4( res_v, x0r_v, 1 ), 0); + res_v = remainderf4(x1n_v, x1d_v); + TEST_CHECK("20040928191933EJL", allequal_ulps_float4( res_v, x1r_v, 1 ), 0); + res_v = remainderf4(x2n_v, x2d_v); + TEST_CHECK("20040928191934EJL", allequal_ulps_float4( res_v, x2r_v, 1 ), 0); + res_v = remainderf4(x3n_v, x3d_v); + TEST_CHECK("20040928191936EJL", allequal_ulps_float4( res_v, x3r_v, 1 ), 0); + res_v = remainderf4(x4n_v, x4d_v); + TEST_CHECK("20040928191937EJL", allequal_ulps_float4( res_v, x4r_v, 1 ), 0); + res_v = remainderf4(x5n_v, x5d_v); + TEST_CHECK("20040928191938EJL", allequal_ulps_float4( res_v, x5r_v, 1 ), 0); + + TEST_START("remainderf"); + res = remainderf(x0n, x0d); + TEST_CHECK("20040928191941EJL", ulpDiff_f( res, x0r ) <= 1, 0); + res = remainderf(x1n, x1d); + TEST_CHECK("20040928191942EJL", ulpDiff_f( res, x1r ) <= 1, 0); + res = remainderf(x2n, x2d); + TEST_CHECK("20040928191943EJL", ulpDiff_f( res, x2r ) <= 1, 0); + res = remainderf(x3n, x3d); + TEST_CHECK("20040928191948EJL", ulpDiff_f( res, x3r ) <= 1, 0); + res = remainderf(x4n, x4d); + TEST_CHECK("20040928191949EJL", ulpDiff_f( res, x4r ) <= 1, 0); + res = remainderf(x5n, x5d); + TEST_CHECK("20040928191950EJL", ulpDiff_f( res, x5r ) <= 1, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/remquod2.c b/Extras/simdmathlibrary/spu/tests/remquod2.c new file mode 100644 index 000000000..4060ed09e --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/remquod2.c @@ -0,0 +1,151 @@ +/* Test remquod2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * note: cannot calc too far numbers correctry + * ex. x=0xFFE0000000000000,y=0x0008000000000000 + */ + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int yyy[2]; + unsigned long long int quo[2]; + unsigned long long int a_res[2]; + unsigned long long int a_quo[2]; +} TestVec64_RemqD; + +int main() +{ + TestVec64_RemqD test_a[] = { + { + // normal 2.5/1.5 29/3 + {0x4004000000000000ULL,0x403d000000000000ULL}, + {0x3ff8000000000000ULL,0x4008000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0xbfe0000000000000ULL,0xbff0000000000000ULL}, + {0x0000000000000002ULL,0x0000000000000002ULL} + },{ + // normal + {0x09d0000000000006ULL,0x1000000000000000ULL}, + {0x8010000000000005ULL,0x0010000000000007ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0x800000000000007dULL,0x80037ffffffff1a5ULL}, + {0xFFFFFFFFFFFFFFFFULL,0x0000000000000003ULL} + },{ + // denorm + {0x0000000000000001ULL,0x800ffffffffffff3ULL}, + {0x8000000000000001ULL,0x8000000000000005ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000002ULL}, + {0xFFFFFFFFFFFFFFFFULL,0x0000000000000001ULL} + },{ + // divide by inf + {0xFFEFFFFFFFFFFFFFULL,0x0001000000000000ULL}, + {0x7FF0000000000000ULL,0x7FF0000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0xFFEFFFFFFFFFFFFFULL,0x0001000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + TestVec64_RemqD test_b[] = { + { + // divide by zero -> nan + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0x7ff8000000000000ULL,0x7ff8000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0x7ff8000000000000ULL,0x7ff8000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL} + },{ + // border + {0xFFE0000000000000ULL,0x7FEFFFFFFFFFFFFFULL}, + {0x0008000000000000ULL,0x0010000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x0000000000000000ULL,0x0000000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060919210000NM","NM", "remquod2"); + + TEST_START("remquod2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + res_v = remquod2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_double2 *)&test_a[ii].yyy[0]), ((vec_llong2 *)&test_a[ii].quo[0])); + sprintf(msg,"2006091921%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].a_res[0])), 0); + sprintf(msg,"2006091922%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( *((vec_llong2 *)&test_a[ii].quo[0]), *((vec_llong2 *)&test_a[ii].a_quo[0])), 0); + } + for (ii=0; ; ii++) { + if ( (test_b[ii].xxx[0] == 0) && (test_b[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + res_v = remquod2 (*((vec_double2 *)&test_b[ii].xxx[0]), *((vec_double2 *)&test_b[ii].yyy[0]), ((vec_llong2 *)&test_b[ii].quo[0])); + sprintf(msg,"2006091923%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_b[ii].a_res[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/remquof4.c b/Extras/simdmathlibrary/spu/tests/remquof4.c new file mode 100644 index 000000000..065db3573 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/remquof4.c @@ -0,0 +1,167 @@ +/* Test remquof4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * copied test data from remainderf4 + * wrong quotient returns in scalar function + */ + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060912170027NM","NM", "remquof4"); + + unsigned int i0n = 0x449edbc6; + unsigned int i0d = 0x40cf799d; + unsigned int i0r = 0x3daa7300; + unsigned int i0q = 4; + unsigned int i1n = 0x6bca107a; + unsigned int i1d = 0x6c4a107a; + unsigned int i1r = 0x6bca107a; + unsigned int i1q = 0; + unsigned int i2n = 0x1c123605; + unsigned int i2d = 0x1c923602; + unsigned int i2r = 0x9c1235ff; + unsigned int i2q = 1; + unsigned int i3n = 0x2b4c50fa; + unsigned int i3d = 0x253a3ae3; + unsigned int i3r = 0xa41873a8; + unsigned int i3q = 6; + unsigned int i4n = 0x73addffc; + unsigned int i4d = 0x742ddffc; + unsigned int i4r = 0x73addffc; + unsigned int i4q = 0; + unsigned int i5n = 0x29d4d97c; + unsigned int i5d = 0x2a546e77; + unsigned int i5r = 0xa9d40372; + unsigned int i5q = 1; + + float x0n = hide_float(make_float(i0n)); + float x0d = hide_float(make_float(i0d)); + float x0r = hide_float(make_float(i0r)); + + float x1n = hide_float(make_float(i1n)); + float x1d = hide_float(make_float(i1d)); + float x1r = hide_float(make_float(i1r)); + + float x2n = hide_float(make_float(i2n)); + float x2d = hide_float(make_float(i2d)); + float x2r = hide_float(make_float(i2r)); + + float x3n = hide_float(make_float(i3n)); + float x3d = hide_float(make_float(i3d)); + float x3r = hide_float(make_float(i3r)); + + float x4n = hide_float(make_float(i4n)); + float x4d = hide_float(make_float(i4d)); + float x4r = hide_float(make_float(i4r)); + + float x5n = hide_float(make_float(i5n)); + float x5d = hide_float(make_float(i5d)); + float x5r = hide_float(make_float(i5r)); + + vec_float4 x0n_v = spu_splats(x0n); + vec_float4 x0d_v = spu_splats(x0d); + vec_float4 x0r_v = spu_splats(x0r); + + vec_float4 x1n_v = spu_splats(x1n); + vec_float4 x1d_v = spu_splats(x1d); + vec_float4 x1r_v = spu_splats(x1r); + + vec_float4 x2n_v = spu_splats(x2n); + vec_float4 x2d_v = spu_splats(x2d); + vec_float4 x2r_v = spu_splats(x2r); + + vec_float4 x3n_v = spu_splats(x3n); + vec_float4 x3d_v = spu_splats(x3d); + vec_float4 x3r_v = spu_splats(x3r); + + vec_float4 x4n_v = spu_splats(x4n); + vec_float4 x4d_v = spu_splats(x4d); + vec_float4 x4r_v = spu_splats(x4r); + + vec_float4 x5n_v = spu_splats(x5n); + vec_float4 x5d_v = spu_splats(x5d); + vec_float4 x5r_v = spu_splats(x5r); + + float res; + int quo; + vec_float4 res_v; + vec_int4 quo_v; + + TEST_START("remquof4"); + res_v = remquof4(x0n_v, x0d_v, &quo_v); + TEST_CHECK("20060912170031NM", allequal_ulps_float4( res_v, x0r_v, 1 ), 0); + TEST_CHECK("20060912170131NM", allequal_int4( quo_v, spu_splats((int)i0q) ), 0); + res_v = remquof4(x1n_v, x1d_v, &quo_v); + TEST_CHECK("20060912170033NM", allequal_ulps_float4( res_v, x1r_v, 1 ), 0); + TEST_CHECK("20060912170133NM", allequal_int4( quo_v, spu_splats((int)i1q) ), 0); + res_v = remquof4(x2n_v, x2d_v, &quo_v); + TEST_CHECK("20060912170034NM", allequal_ulps_float4( res_v, x2r_v, 1 ), 0); + TEST_CHECK("20060912170134NM", allequal_int4( quo_v, spu_splats((int)i2q) ), 0); + res_v = remquof4(x3n_v, x3d_v, &quo_v); + TEST_CHECK("20060912170036NM", allequal_ulps_float4( res_v, x3r_v, 1 ), 0); + TEST_CHECK("20060912170136NM", allequal_int4( quo_v, spu_splats((int)i3q) ), 0); + res_v = remquof4(x4n_v, x4d_v, &quo_v); + TEST_CHECK("20060912170037NM", allequal_ulps_float4( res_v, x4r_v, 1 ), 0); + TEST_CHECK("20060912170137NM", allequal_int4( quo_v, spu_splats((int)i4q) ), 0); + res_v = remquof4(x5n_v, x5d_v, &quo_v); + TEST_CHECK("20060912170038NM", allequal_ulps_float4( res_v, x5r_v, 1 ), 0); + TEST_CHECK("20060912170138NM", allequal_int4( quo_v, spu_splats((int)i5q) ), 0); + + TEST_START("remquof"); + res = remquof(x0n, x0d, &quo); + TEST_CHECK("20060912170041NM", ulpDiff_f( res, x0r ) <= 1, 0); + TEST_CHECK("20060912170141NM", (quo == (int)i0q), 0); + res = remquof(x1n, x1d, &quo); + TEST_CHECK("20060912170042NM", ulpDiff_f( res, x1r ) <= 1, 0); + TEST_CHECK("20060912170142NM", (quo == (int)i1q), 0); + res = remquof(x2n, x2d, &quo); + TEST_CHECK("20060912170043NM", ulpDiff_f( res, x2r ) <= 1, 0); + TEST_CHECK("20060912170143NM", (quo == (int)i2q), 0); + res = remquof(x3n, x3d, &quo); + TEST_CHECK("20060912170048NM", ulpDiff_f( res, x3r ) <= 1, 0); + TEST_CHECK("20060912170144NM", (quo == (int)i3q), 0); + res = remquof(x4n, x4d, &quo); + TEST_CHECK("20060912170049NM", ulpDiff_f( res, x4r ) <= 1, 0); + TEST_CHECK("20060912170149NM", (quo == (int)i4q), 0); + res = remquof(x5n, x5d, &quo); + TEST_CHECK("20060912170050NM", ulpDiff_f( res, x5r ) <= 1, 0); + TEST_CHECK("20060912170150NM", (quo == (int)i5q), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/rintd2.c b/Extras/simdmathlibrary/spu/tests/rintd2.c new file mode 100644 index 000000000..7eeb40b18 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/rintd2.c @@ -0,0 +1,180 @@ +/* Test rintd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ rintd2 - Round the input to the nearest integer according to + * the current rounding mode. + * + *@brief + * boundary test for rintd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * add Denormalized handling + * changed over 0x4330000000000000(ABS) handling + * + * + **/ + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int ans0[2]; + unsigned long long int ans1[2]; + unsigned long long int ans2[2]; + unsigned long long int ans3[2]; +} TestVec64_NerI; + +int main() +{ + TestVec64_NerI test_a[] = { + { + // zero + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // border + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL}, + {0xc330000000000000ULL,0x4330000000000000ULL} + },{ + // MIN , MAX + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL} + },{ + // denotmalized + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x0000000000000000ULL}, + {0x8000000000000000ULL,0x3ff0000000000000ULL}, + {0xbff0000000000000ULL,0x0000000000000000ULL} + },{ + // denotmalized + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x3ff0000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0xbff0000000000000ULL} + },{ + // 1.0 + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL} + },{ + // 1.5 + {0x3ff8000000000000ULL,0xbff8000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x4000000000000000ULL,0xbff0000000000000ULL}, + {0x3ff0000000000000ULL,0xc000000000000000ULL} + },{ + // 2.5 + {0x4004000000000000ULL,0xc004000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x4000000000000000ULL,0xc000000000000000ULL}, + {0x4008000000000000ULL,0xc000000000000000ULL}, + {0x4000000000000000ULL,0xc008000000000000ULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060831210000NM","NM", "rintd2"); + + TEST_START("rintd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + spu_mtfpscr(((vec_uint4){0x0000,0,0,0})); + res_v = rintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006083121%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans0[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0500,0,0,0})); + res_v = rintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006083121%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans1[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0a00,0,0,0})); + res_v = rintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006083121%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans2[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0f00,0,0,0})); + res_v = rintd2 (*((vec_double2 *)&test_a[ii].xxx[0])); + sprintf(msg,"2006083121%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans3[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/rintf4.c b/Extras/simdmathlibrary/spu/tests/rintf4.c new file mode 100644 index 000000000..c845240fd --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/rintf4.c @@ -0,0 +1,94 @@ +/* Test rintf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#define DEFINE_DATA(var,a,b) \ + float var = hide_float(a); \ + float var ## _out = hide_float(b); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +/* + */ +#define DEFINE_DATA_UNSIGNED(var,a,b)\ + unsigned int var ## _ina = a ; \ + unsigned int var ## _inb = b ; \ + float var = make_float(var ## _ina); \ + float var ## _out = make_float(var ## _inb); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +#define DO_TEST(var,id) \ + res_v = rintf4(var ## _v); \ + TEST_CHECK(" #id ", allequal_float4( res_v, var ## _out_v ), 0); + + +int main() +{ + vec_float4 res_v; + + TEST_SET_START("184604792300","RNT", "rintf4"); + + + + + //s=0, e=100, f=7fffff --> s=0, e=100, f=7fffff + DEFINE_DATA_UNSIGNED(x1,0x71ffffff,0x71ffffff) + //s=0, e=22, f=0x7fffff --> s=0,e=22,f=0x7ffffe + DEFINE_DATA_UNSIGNED(x2, 0x4affffff,0x4afffffe) + //s=0, e=23, f=0 --> s=0,e=23,f=0 + DEFINE_DATA_UNSIGNED(x3, 0x4b000000,0x4b000000) + //s=0, e=-126, f=0 --> 0 + DEFINE_DATA_UNSIGNED(x4, 0x800000,0x0) + DEFINE_DATA(x5, 1.001f, 1.f) + DEFINE_DATA(x6, -.05f, 0.f) + DEFINE_DATA(x7, 0.9999f, 0.f) + DEFINE_DATA(x8, 0.4999f, 0.f) + + TEST_START("rintf4"); + DO_TEST(x1,184604792301RNT) + DO_TEST(x2,184604792302RNT) + DO_TEST(x3,184604792303RNT) + DO_TEST(x4,184604792304RNT) + DO_TEST(x5,184604792305RNT) + DO_TEST(x6,184604792306RNT) + DO_TEST(x7,184604792307RNT) + DO_TEST(x8,184604792308RNT) + TEST_SET_DONE(); + + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/roundd2.c b/Extras/simdmathlibrary/spu/tests/roundd2.c new file mode 100644 index 000000000..bdb780fe7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/roundd2.c @@ -0,0 +1,151 @@ +/* Test roundd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ roundd2 - Round the input to the nearest integer. + * + *@brief + * boundary test for nextafterd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef union { + struct { + double xxx[2]; + double ans[2]; + } dbl; + struct { + unsigned long long xxx[2]; + unsigned long long ans[2]; + } ull; +} TestVec_Roundd2; + +int main() +{ + TestVec_Roundd2 test_a[] = { + { + ull:{ + // 0 -> 0 , -0 -> -0 + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // -Inf -> -Inf , Inf -> Inf + {0xFFF0000000000000ULL,0x7FF0000000000000ULL}, + {0xFFF0000000000000ULL,0x7FF0000000000000ULL} + } + },{ + ull:{ + // MAX -> MAX , MIN -> MIN + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x0000000000000001ULL,0x8000000000000010ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x800FFFFFFFFFFFFFULL,0x000FFFFFFFFFFFFFULL}, + {0x8000000000000000ULL,0x0000000000000000ULL} + } + },{ + ull:{ + // border + {0x4320000000000001ULL,0xC320000000000001ULL}, + {0x4320000000000002ULL,0xC320000000000002ULL} + } + },{ + dbl:{ + {1.0, -1.0}, + {1.0, -1.0} + } + },{ + dbl:{ + {-2.5, 3.5}, + {-3.0, 4.0} + } + },{ + ull:{ + // Nan + {0xFFF0000000000001ULL,0x7FF0000000000001ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL} + } + },{ + ull:{ + {0ULL,0ULL}, + {0ULL,0ULL} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060831200000NM","NM", "roundd2"); + + TEST_START("roundd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ull.xxx[0] == 0) && (test_a[ii].ull.xxx[1] == 0) ) break; + + res_v = roundd2 (*((vec_double2 *)&test_a[ii].dbl.xxx[0]) ); + sprintf(msg,"2006083120%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, (vec_llong2)*((vec_double2 *)&test_a[ii].dbl.ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/roundf4.c b/Extras/simdmathlibrary/spu/tests/roundf4.c new file mode 100644 index 000000000..9bbbcbc97 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/roundf4.c @@ -0,0 +1,100 @@ +/* Test roundf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +// a: float b:float +#define DEFINE_DATA(var,a,b) \ + float var = hide_float(a); \ + float var ## _out = hide_float(b); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +// a: bit pattern b: bit pattern +#define DEFINE_DATA_UNSIGNED(var,a,b) \ + unsigned int var ## _ina = a ; \ + unsigned int var ## _inb = b ; \ + float var = make_float (var ## _ina); \ + float var ## _out = make_float(var ## _inb); \ + vec_float4 var ## _v = spu_splats(var); \ + vec_float4 var ## _out_v = spu_splats(var ## _out); + +#define DO_TEST(var,id) \ + res_v = roundf4(var ## _v); \ + TEST_CHECK(" #id ", allequal_float4( res_v, var ## _out_v ), 0); + + +int main() +{ + vec_float4 res_v; + + TEST_SET_START("164260798500","RUD", "roundf4"); + + + + //s=0 + DEFINE_DATA(x1, 1.0, 1.0f) + DEFINE_DATA(x2, -1.0,-1.0f) + //s=-1 + DEFINE_DATA(x3, 0.5, 1.0f) + DEFINE_DATA(x4, -0.5, -1.0f) + //s=-2 + DEFINE_DATA(x5, 0.25, 0.0f) + //s=-3 + DEFINE_DATA(x6, 0.125, 0.0f) + //s=0, e=128, f=7fffff --> s=0, e=128, f=7fffff + DEFINE_DATA_UNSIGNED(x7,0x7fffffff,0x7fffffff) + //s=0, e=-126, f=0 --> 0 + DEFINE_DATA_UNSIGNED(x8, 0x800000,0x0) + DEFINE_DATA(x9, 0.4999, 0.f) + DEFINE_DATA(x10, 0.9999, 1.f) + + //TEST + TEST_START("roundf4"); + DO_TEST(x1,164260798501RUD) + DO_TEST(x2,164260798502RUD) + DO_TEST(x3,164260798503RUD) + DO_TEST(x4,164260798504RUD) + DO_TEST(x5,164260798505RUD) + DO_TEST(x6,164260798506RUD) + DO_TEST(x7,164260798507RUD) + DO_TEST(x8,164260798508RUD) + DO_TEST(x9,164260798509RUD) + DO_TEST(x10,164260798510RUD) + TEST_SET_DONE(); + + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/rsqrtd2.c b/Extras/simdmathlibrary/spu/tests/rsqrtd2.c new file mode 100644 index 000000000..63de28d65 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/rsqrtd2.c @@ -0,0 +1,126 @@ +/* Test rsqrtd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040928174038EJL","EJL", "rsqrtd2"); + + unsigned long long i6 = 0x7464fff515d76f87ull; + unsigned long long i6r = 0x25b3c03b72dba06cull; + unsigned long long i7 = 0x7606a4533cf5605eull; + unsigned long long i7r = 0x24e3056f4b45f6a9ull; + unsigned long long i8 = 0x4beae58c6f48733eull; + unsigned long long i8r = 0x39f173b787396c5full; + unsigned long long i9 = 0x3999ed5c8316b00bull; + unsigned long long i9r = 0x43192359a70ec761ull; + unsigned long long i10 = 0x68f7885c4b84b793ull; + unsigned long long i10r = 0x2b6a62d48c269d90ull; + unsigned long long i11 = 0x1aabc083c5c26227ull; + unsigned long long i11r = 0x52912e543817fabbull; + + double x0 = hide_double(-1.0/0.0); // -Inf -> NaN + double x1 = hide_double(1.0/0.0); // Inf -> +0 + double x2 = hide_double(0.0); // +0 -> Inf + double x3 = hide_double(-0.0); // -0 -> -Inf + double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x5 = hide_double(4.0); + double x5r = hide_double(0.5); + double x6 = hide_double(make_double(i6)); + double x6r = hide_double(make_double(i6r)); + double x7 = hide_double(make_double(i7)); + double x7r = hide_double(make_double(i7r)); + double x8 = hide_double(make_double(i8)); + double x8r = hide_double(make_double(i8r)); + double x9 = hide_double(make_double(i9)); + double x9r = hide_double(make_double(i9r)); + double x10 = hide_double(make_double(i10)); + double x10r = hide_double(make_double(i10r)); + double x11 = hide_double(make_double(i11)); + double x11r = hide_double(make_double(i11r)); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 x1_v = spu_splats(x1); + vec_double2 x2_v = spu_splats(x2); + vec_double2 x3_v = spu_splats(x3); + vec_double2 x4_v = spu_splats(x4); + vec_double2 x5_v = spu_splats(x5); + vec_double2 x5r_v = spu_splats(x5r); + vec_double2 x6_v = spu_splats(x6); + vec_double2 x6r_v = spu_splats(x6r); + vec_double2 x7_v = spu_splats(x7); + vec_double2 x7r_v = spu_splats(x7r); + vec_double2 x8_v = spu_splats(x8); + vec_double2 x8r_v = spu_splats(x8r); + vec_double2 x9_v = spu_splats(x9); + vec_double2 x9r_v = spu_splats(x9r); + vec_double2 x10_v = spu_splats(x10); + vec_double2 x10r_v = spu_splats(x10r); + vec_double2 x11_v = spu_splats(x11); + vec_double2 x11r_v = spu_splats(x11r); + + vec_double2 res_v; + + TEST_START("rsqrtd2"); + res_v = rsqrtd2(x0_v); + TEST_CHECK("20040928174042EJL", allnan_double2( res_v ), 0); + res_v = rsqrtd2(x1_v); + TEST_CHECK("20040928174045EJL", allposzero_double2( res_v ), 0); + res_v = rsqrtd2(x2_v); + TEST_CHECK("20040928174047EJL", allposinf_double2( res_v ), 0); + res_v = rsqrtd2(x3_v); + TEST_CHECK("20040928174049EJL", allneginf_double2( res_v ), 0); + res_v = rsqrtd2(x4_v); + TEST_CHECK("20040928174054EJL", allnan_double2( res_v ), 0); + res_v = rsqrtd2(x5_v); + TEST_CHECK("20040928174058EJL", allequal_double2( res_v, x5r_v ), 0); + res_v = rsqrtd2(x6_v); + TEST_CHECK("20040928174101EJL", allequal_ulps_double2( res_v, x6r_v, 1 ), 0); + res_v = rsqrtd2(x7_v); + TEST_CHECK("20040928174104EJL", allequal_ulps_double2( res_v, x7r_v, 1 ), 0); + res_v = rsqrtd2(x8_v); + TEST_CHECK("20040928174106EJL", allequal_ulps_double2( res_v, x8r_v, 1 ), 0); + res_v = rsqrtd2(x9_v); + TEST_CHECK("20040928174108EJL", allequal_ulps_double2( res_v, x9r_v, 1 ), 0); + res_v = rsqrtd2(x10_v); + TEST_CHECK("20040928174110EJL", allequal_ulps_double2( res_v, x10r_v, 1 ), 0); + res_v = rsqrtd2(x11_v); + TEST_CHECK("20040928174113EJL", allequal_ulps_double2( res_v, x11r_v, 1 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/rsqrtf4.c b/Extras/simdmathlibrary/spu/tests/rsqrtf4.c new file mode 100644 index 000000000..898ad5ba0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/rsqrtf4.c @@ -0,0 +1,93 @@ +/* Test rsqrtf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040928182349EJL","EJL", "rsqrtf4"); + + unsigned int i3 = 0x742c4455; + unsigned int i3r = 0x251c099a; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x243e5fe2; + unsigned int i5 = 0x4baa9e3c; + unsigned int i5r = 0x395dbbeb; + unsigned int i6 = 0x39344296; + unsigned int i6r = 0x429889eb; + unsigned int i7 = 0x68a586b0; + unsigned int i7r = 0x2ae11e67; + + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3r_v = spu_splats(x3r); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4r_v = spu_splats(x4r); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5r_v = spu_splats(x5r); + vec_float4 x6_v = spu_splats(x6); + vec_float4 x6r_v = spu_splats(x6r); + vec_float4 x7_v = spu_splats(x7); + vec_float4 x7r_v = spu_splats(x7r); + + vec_float4 res_v; + + TEST_START("rsqrtf4"); + res_v = rsqrtf4(x3_v); + TEST_CHECK("20040928182352EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = rsqrtf4(x4_v); + TEST_CHECK("20040928182355EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = rsqrtf4(x5_v); + TEST_CHECK("20040928182357EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = rsqrtf4(x6_v); + TEST_CHECK("20040928182358EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = rsqrtf4(x7_v); + TEST_CHECK("20040928182401EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/scalbllnd2.c b/Extras/simdmathlibrary/spu/tests/scalbllnd2.c new file mode 100644 index 000000000..e3484b371 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/scalbllnd2.c @@ -0,0 +1,251 @@ +/* Test scalbllnd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ scalbllnd2 - Multiply Double by 2 Raised to its Power + * For large elements of ex (overflow), returns HUGE_VALF + * For small elements of ex (underflow), returns 0. + * + *@brief + * boundary test for scalbllnd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * add Denormalized handling + * Round mode was passed because of spec. (underflow returns 0) + * + * + **/ + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +#undef SCALBLLND2_ROUND + + +typedef struct { + unsigned long long int xxx[2]; + unsigned long long int exp[2]; + unsigned long long int ans0[2]; + unsigned long long int ans1[2]; + unsigned long long int ans2[2]; + unsigned long long int ans3[2]; +} TestVec64_Ldexp; + +int main() +{ + TestVec64_Ldexp test_a[] = { + { + // zero + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000400ULL,0xFFFFFFFFFFFFFC00ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // MIN , MAX + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL}, + {0xFFEFFFFFFFFFFFFFULL,0x7FEFFFFFFFFFFFFFULL} + },{ + // Inf , -Inf + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL}, + {0x7FF0000000000000ULL,0xFFF0000000000000ULL} + },{ +#ifdef SCALBLLND2_ROUND + // denotmalized + {0x8000000000000003ULL,0x0000000000000003ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x8000000000000002ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000002ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000002ULL,0x0000000000000002ULL} + },{ + // denotmalized -54 + {0x0010000000000001ULL,0x8010000000000001ULL}, + {0xFFFFFFFFFFFFFFCAULL,0xFFFFFFFFFFFFFFCAULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFFFFFFF7CEULL,0xFFFFFFFFFFFFF7CEULL}, + {0x0000000000000001ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000001ULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFFFFFFF7CDULL,0xFFFFFFFFFFFFF7CDULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ +#else // SCALBLLND2_ROUND + // denotmalized + {0x8000000000000003ULL,0x0000000000000003ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL}, + {0x8000000000000001ULL,0x0000000000000001ULL} + },{ + +#endif // SCALBLLND2_ROUND + // denotmalized + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0008000000000000ULL,0x8008000000000000ULL} + },{ + // denotmalized + {0x0008000000000000ULL,0x8008000000000000ULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL}, + {0x0010000000000000ULL,0x8010000000000000ULL} + },{ + // 1.0 + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x00000000000003ffULL,0x00000000000003ffULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL}, + {0x7FE0000000000000ULL,0xFFE0000000000000ULL} + },{ + // 1.0 -> max + {0x3ff0000000000000ULL,0xbff0000000000000ULL}, + {0x0000000000000400ULL,0x0000000000000400ULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + },{ + // max -> ! + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0xFFFFFFFF00000000ULL,0xFFFFFFFF00000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + },{ + // min-> + {0x0000000000000001ULL,0x8000000000000001ULL}, + {0x0FFFFFFFFFFFFFFFULL,0x0FFFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + },{ + // NaN , -NaN + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x0000000000000001ULL,0x0000000000000001ULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL}, + {0x7FFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL} + },{ + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL}, + {0ULL,0ULL} + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060907180000NM","NM", "scalbllnd2"); + + TEST_START("scalbllnd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].xxx[0] == 0) && (test_a[ii].xxx[1] == 0) ) break; + + // set Floating point round mode + spu_mtfpscr(((vec_uint4){0x0100,0,0,0})); + res_v = scalbllnd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090718%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans0[0])), 0); + +#ifdef SCALBLLND2_ROUND + + spu_mtfpscr(((vec_uint4){0x0600,0,0,0})); + res_v = scalbllnd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090718%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans1[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0b00,0,0,0})); + res_v = scalbllnd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090718%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans2[0])), 0); + + spu_mtfpscr(((vec_uint4){0x0c00,0,0,0})); + res_v = scalbllnd2 (*((vec_double2 *)&test_a[ii].xxx[0]), *((vec_llong2 *)&test_a[ii].exp[0])); + sprintf(msg,"2006090718%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, *((vec_llong2 *)&test_a[ii].ans3[0])), 0); +#endif // SCALBLLND2_ROUND + + } + + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/scalbnf4.c b/Extras/simdmathlibrary/spu/tests/scalbnf4.c new file mode 100644 index 000000000..3961ebdf2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/scalbnf4.c @@ -0,0 +1,117 @@ +/* Test nextafterf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef union { + struct { + float xxx[4]; + int exp[4]; + float ans[4]; + } flt; + struct { + unsigned int xxx[4]; + unsigned int exp[4]; + unsigned int ans[4]; + } ui; +} TestVec_ScalF4; + + +int main() +{ + TestVec_ScalF4 test_a[] = { + { + ui:{ + // 0 -> 0 , -0 -> -0 + {0x00000000,0x80000000,0x80000000,0x00000000}, + {0x000000FF,0x00000001,0xFFFFFFFF,0xFFFFFF00}, + {0x00000000,0x80000000,0x80000000,0x00000000} + } + },{ + ui:{ + // Inf + {0xFF800000,0x7F800000,0x7F800000,0xFF800000}, + {0x000000FF,0x00000001,0xFFFFFFFF,0xFFFFFF00}, + {0xFFFFFFFF,0x7FFFFFFF,0x7F000000,0x80000000} + } + },{ + ui:{ + // MAX MIN + {0x7F7FFFFF,0xFF7FFFFF,0x7F7FFFFF,0xFF7FFFFF}, + {0x00000001,0x00000001,0xFFFFFFFF,0xFFFFFF00}, + {0x7FFFFFFF,0xFFFFFFFF,0x7EFFFFFF,0x00000000} + } + },{ + flt:{ + {-1.0, 1.0, -1.0, 1.0}, + { 1, 1, -1, -1}, + {-2.0, 2.0, -0.5, 0.5} + } + },{ + ui:{ + // + {0x80ffffff,0x80ffffff,0x00ffffff,0x00ffffff}, + {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}, + {0x80000000,0x80000000,0x00000000,0x00000000} + } + },{ + ui:{ + {0,0,0,0}, + {0,0,0,0}, + {0,0,0,0} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_float4 res_v; + + TEST_SET_START("20060907150000NM","NM", "scalbnf4"); + + TEST_START("scalbnf4"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ui.xxx[0] == 0) && (test_a[ii].ui.xxx[1] == 0) ) break; + + res_v = scalbnf4 (*((vec_float4 *)&test_a[ii].flt.xxx[0]), *((vec_int4 *)&test_a[ii].flt.exp[0]) ); + sprintf(msg,"2006090715%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_float4( res_v, *((vec_float4 *)&test_a[ii].flt.ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/signbitd2.c b/Extras/simdmathlibrary/spu/tests/signbitd2.c new file mode 100644 index 000000000..e4c94cb9e --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/signbitd2.c @@ -0,0 +1,195 @@ +/* Test signbitd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060829000000AAN","AAN", "signbitd2"); + + //-Nan + double x0 = hide_double(-nan("")); + unsigned long long r0 = 0xffffffffffffffffull; + + //-Inf + double x1 = hide_double(-1.0/0.0); + unsigned long long r1 = 0xffffffffffffffffull; + + //-Smax + double x2 = hide_double(-DBL_MAX); + unsigned long long r2 = 0xffffffffffffffffull; + + //-Norm + double x3 = hide_double(-168.97345223013); + unsigned long long r3 = 0xffffffffffffffffull; + + //-Denorm + double x4 = hide_double(make_double(0x803AAAAAAAAAAAAAull)); + unsigned long long r4 = 0xffffffffffffffffull; + + //-Smin + double x5 = hide_double(-DBL_MIN); + unsigned long long r5 = 0xffffffffffffffffull; + + //-Unf + double x6 = hide_double(-1.0e-999); + unsigned long long r6 = 0xffffffffffffffffull; + + // -0 + double x7 = hide_double(-0.0); + unsigned long long r7 = 0xffffffffffffffffull; + + // 0 + double x8 = hide_double( 0.0); + unsigned long long r8 = 0x0000000000000000ull; + + //+Unf + double x9 = hide_double( 1.0e-999); + unsigned long long r9 = 0x0000000000000000ull; + + //+Smin + double x10 = hide_double( DBL_MIN); + unsigned long long r10 = 0x0000000000000000ull; + + //+Denorm + double x11 = hide_double(make_double(0x007AAAAAAAAAAAAAull)); + unsigned long long r11 = 0x0000000000000000ull; + + //+Norm + double x12 = hide_double( 672953.74593); + unsigned long long r12 = 0x0000000000000000ull; + + //+Smax + double x13 = hide_double( DBL_MAX); + unsigned long long r13 = 0x0000000000000000ull; + + //+Inf + double x14 = hide_double( 1.0/0.0); + unsigned long long r14 = 0x0000000000000000ull; + + //+NaN + double x15 = hide_double( nan("")); + unsigned long long r15 = 0x0000000000000000ull; + + vec_double2 x0_v = spu_splats(x0); + vec_ullong2 r0_v = spu_splats(r0); + + vec_double2 x1_v = spu_splats(x1); + vec_ullong2 r1_v = spu_splats(r1); + + vec_double2 x2_v = spu_splats(x2); + vec_ullong2 r2_v = spu_splats(r2); + + vec_double2 x3_v = spu_splats(x3); + vec_ullong2 r3_v = spu_splats(r3); + + vec_double2 x4_v = spu_splats(x4); + vec_ullong2 r4_v = spu_splats(r4); + + vec_double2 x5_v = spu_splats(x5); + vec_ullong2 r5_v = spu_splats(r5); + + vec_double2 x6_v = spu_splats(x6); + vec_ullong2 r6_v = spu_splats(r6); + + vec_double2 x7_v = spu_splats(x7); + vec_ullong2 r7_v = spu_splats(r7); + + vec_double2 x8_v = spu_splats(x8); + vec_ullong2 r8_v = spu_splats(r8); + + vec_double2 x9_v = spu_splats(x9); + vec_ullong2 r9_v = spu_splats(r9); + + vec_double2 x10_v = spu_splats(x10); + vec_ullong2 r10_v = spu_splats(r10); + + vec_double2 x11_v = spu_splats(x11); + vec_ullong2 r11_v = spu_splats(r11); + + vec_double2 x12_v = spu_splats(x12); + vec_ullong2 r12_v = spu_splats(r12); + + vec_double2 x13_v = spu_splats(x13); + vec_ullong2 r13_v = spu_splats(r13); + + vec_double2 x14_v = spu_splats(x14); + vec_ullong2 r14_v = spu_splats(r14); + + vec_double2 x15_v = spu_splats(x15); + vec_ullong2 r15_v = spu_splats(r15); + + vec_ullong2 res_v; + + TEST_START("signbitd2"); + + res_v = (vec_ullong2)signbitd2(x0_v); + TEST_CHECK("20060829000000AAN", allequal_ullong2( res_v, r0_v ), 0); + res_v = (vec_ullong2)signbitd2(x1_v); + TEST_CHECK("20060829000001AAN", allequal_ullong2( res_v, r1_v ), 0); + res_v = (vec_ullong2)signbitd2(x2_v); + TEST_CHECK("20060829000002AAN", allequal_ullong2( res_v, r2_v ), 0); + res_v = (vec_ullong2)signbitd2(x3_v); + TEST_CHECK("20060829000003AAN", allequal_ullong2( res_v, r3_v ), 0); + res_v = (vec_ullong2)signbitd2(x4_v); + TEST_CHECK("20060829000004AAN", allequal_ullong2( res_v, r4_v ), 0); + res_v = (vec_ullong2)signbitd2(x5_v); + TEST_CHECK("20060829000005AAN", allequal_ullong2( res_v, r5_v ), 0); + res_v = (vec_ullong2)signbitd2(x6_v); + TEST_CHECK("20060829000006AAN", allequal_ullong2( res_v, r6_v ), 0); + res_v = (vec_ullong2)signbitd2(x7_v); + TEST_CHECK("20060829000007AAN", allequal_ullong2( res_v, r7_v ), 0); + res_v = (vec_ullong2)signbitd2(x8_v); + TEST_CHECK("20060829000008AAN", allequal_ullong2( res_v, r8_v ), 0); + res_v = (vec_ullong2)signbitd2(x9_v); + TEST_CHECK("20060829000009AAN", allequal_ullong2( res_v, r9_v ), 0); + res_v = (vec_ullong2)signbitd2(x10_v); + TEST_CHECK("20060829000010AAN", allequal_ullong2( res_v, r10_v ), 0); + res_v = (vec_ullong2)signbitd2(x11_v); + TEST_CHECK("20060829000011AAN", allequal_ullong2( res_v, r11_v ), 0); + res_v = (vec_ullong2)signbitd2(x12_v); + TEST_CHECK("20060829000012AAN", allequal_ullong2( res_v, r12_v ), 0); + res_v = (vec_ullong2)signbitd2(x13_v); + TEST_CHECK("20060829000013AAN", allequal_ullong2( res_v, r13_v ), 0); + res_v = (vec_ullong2)signbitd2(x14_v); + TEST_CHECK("20060829000014AAN", allequal_ullong2( res_v, r14_v ), 0); + res_v = (vec_ullong2)signbitd2(x15_v); + TEST_CHECK("20060829000015AAN", allequal_ullong2( res_v, r15_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/signbitf4.c b/Extras/simdmathlibrary/spu/tests/signbitf4.c new file mode 100644 index 000000000..d30ff7340 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/signbitf4.c @@ -0,0 +1,195 @@ +/* Test signbitf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20060829000000AAN","AAN", "signbitf4"); + + //-Nan + float x0 = hide_float(-nan("")); + unsigned int r0 = 0xffffffff; + + //-Inf + float x1 = hide_float(-1.0/0.0); + unsigned int r1 = 0xffffffff; + + //-Smax + float x2 = hide_float(-FLT_MAX); + unsigned int r2 = 0xffffffff; + + //-Norm + float x3 = hide_float(-168.97345223013f); + unsigned int r3 = 0xffffffff; + + //-Denorm + float x4 = hide_float(make_float(0x803AAAAA)); + unsigned int r4 = 0xffffffff; + + //-Smin + float x5 = hide_float(-FLT_MIN); + unsigned int r5 = 0xffffffff; + + //-Unf + float x6 = hide_float(-1.0e-999); + unsigned int r6 = 0xffffffff; + + // -0 + float x7 = hide_float(-0.0f); + unsigned int r7 = 0xffffffff; + + // 0 + float x8 = hide_float( 0.0f); + unsigned int r8 = 0x00000000; + + //+Unf + float x9 = hide_float( 1.0e-999); + unsigned int r9 = 0x00000000; + + //+Smin + float x10 = hide_float( FLT_MIN); + unsigned int r10 = 0x00000000; + + //+Denorm + float x11 = hide_float(make_float(0x007AAAAA)); + unsigned int r11 = 0x00000000; + + //+Norm + float x12 = hide_float( 672953.74593f); + unsigned int r12 = 0x00000000; + + //+Smax + float x13 = hide_float( FLT_MAX); + unsigned int r13 = 0x00000000; + + //+Inf + float x14 = hide_float( 1.0/0.0); + unsigned int r14 = 0x00000000; + + //+NaN + float x15 = hide_float( nan("")); + unsigned int r15 = 0x00000000; + + vec_float4 x0_v = spu_splats(x0); + vec_uint4 r0_v = spu_splats(r0); + + vec_float4 x1_v = spu_splats(x1); + vec_uint4 r1_v = spu_splats(r1); + + vec_float4 x2_v = spu_splats(x2); + vec_uint4 r2_v = spu_splats(r2); + + vec_float4 x3_v = spu_splats(x3); + vec_uint4 r3_v = spu_splats(r3); + + vec_float4 x4_v = spu_splats(x4); + vec_uint4 r4_v = spu_splats(r4); + + vec_float4 x5_v = spu_splats(x5); + vec_uint4 r5_v = spu_splats(r5); + + vec_float4 x6_v = spu_splats(x6); + vec_uint4 r6_v = spu_splats(r6); + + vec_float4 x7_v = spu_splats(x7); + vec_uint4 r7_v = spu_splats(r7); + + vec_float4 x8_v = spu_splats(x8); + vec_uint4 r8_v = spu_splats(r8); + + vec_float4 x9_v = spu_splats(x9); + vec_uint4 r9_v = spu_splats(r9); + + vec_float4 x10_v = spu_splats(x10); + vec_uint4 r10_v = spu_splats(r10); + + vec_float4 x11_v = spu_splats(x11); + vec_uint4 r11_v = spu_splats(r11); + + vec_float4 x12_v = spu_splats(x12); + vec_uint4 r12_v = spu_splats(r12); + + vec_float4 x13_v = spu_splats(x13); + vec_uint4 r13_v = spu_splats(r13); + + vec_float4 x14_v = spu_splats(x14); + vec_uint4 r14_v = spu_splats(r14); + + vec_float4 x15_v = spu_splats(x15); + vec_uint4 r15_v = spu_splats(r15); + + vec_uint4 res_v; + + TEST_START("signbitf4"); + + res_v = (vec_uint4)signbitf4(x0_v); + TEST_CHECK("20060829000000AAN", allequal_uint4( res_v, r0_v ), 0); + res_v = (vec_uint4)signbitf4(x1_v); + TEST_CHECK("20060829000001AAN", allequal_uint4( res_v, r1_v ), 0); + res_v = (vec_uint4)signbitf4(x2_v); + TEST_CHECK("20060829000002AAN", allequal_uint4( res_v, r2_v ), 0); + res_v = (vec_uint4)signbitf4(x3_v); + TEST_CHECK("20060829000003AAN", allequal_uint4( res_v, r3_v ), 0); + res_v = (vec_uint4)signbitf4(x4_v); + TEST_CHECK("20060829000004AAN", allequal_uint4( res_v, r4_v ), 0); + res_v = (vec_uint4)signbitf4(x5_v); + TEST_CHECK("20060829000005AAN", allequal_uint4( res_v, r5_v ), 0); + res_v = (vec_uint4)signbitf4(x6_v); + TEST_CHECK("20060829000006AAN", allequal_uint4( res_v, r6_v ), 0); + res_v = (vec_uint4)signbitf4(x7_v); + TEST_CHECK("20060829000007AAN", allequal_uint4( res_v, r7_v ), 0); + res_v = (vec_uint4)signbitf4(x8_v); + TEST_CHECK("20060829000008AAN", allequal_uint4( res_v, r8_v ), 0); + res_v = (vec_uint4)signbitf4(x9_v); + TEST_CHECK("20060829000009AAN", allequal_uint4( res_v, r9_v ), 0); + res_v = (vec_uint4)signbitf4(x10_v); + TEST_CHECK("20060829000010AAN", allequal_uint4( res_v, r10_v ), 0); + res_v = (vec_uint4)signbitf4(x11_v); + TEST_CHECK("20060829000011AAN", allequal_uint4( res_v, r11_v ), 0); + res_v = (vec_uint4)signbitf4(x12_v); + TEST_CHECK("20060829000012AAN", allequal_uint4( res_v, r12_v ), 0); + res_v = (vec_uint4)signbitf4(x13_v); + TEST_CHECK("20060829000013AAN", allequal_uint4( res_v, r13_v ), 0); + res_v = (vec_uint4)signbitf4(x14_v); + TEST_CHECK("20060829000014AAN", allequal_uint4( res_v, r14_v ), 0); + res_v = (vec_uint4)signbitf4(x15_v); + TEST_CHECK("20060829000015AAN", allequal_uint4( res_v, r15_v ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/sqrtd2.c b/Extras/simdmathlibrary/spu/tests/sqrtd2.c new file mode 100644 index 000000000..d83753052 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/sqrtd2.c @@ -0,0 +1,128 @@ +/* Test sqrtd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + + +int main() +{ + TEST_SET_START("20040928181417EJL","EJL", "sqrtd2"); + + unsigned long long i6 = 0x7464fff515d76f87ull; + unsigned long long i6r = 0x5a29ec408d8da268ull; + unsigned long long i7 = 0x7606a4533cf5605eull; + unsigned long long i7r = 0x5afaead3b0ed2de5ull; + unsigned long long i8 = 0x4beae58c6f48733eull; + unsigned long long i8r = 0x45ed566b9c6464d6ull; + unsigned long long i9 = 0x3999ed5c8316b00bull; + unsigned long long i9r = 0x3cc45e14871d1b1full; + unsigned long long i10 = 0x68f7885c4b84b793ull; + unsigned long long i10r = 0x54736778e6778980ull; + unsigned long long i11 = 0x1aabc083c5c26227ull; + unsigned long long i11r = 0x2d4dcce790f64a35ull; + + double x0 = hide_double(-1.0/0.0); // -Inf -> NaN + double x1 = hide_double(1.0/0.0); // Inf -> Inf + double x2 = hide_double(0.0); // +0 -> +0 + double x3 = hide_double(-0.0); // -0 -> -0 + double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x5 = hide_double(4.0); + double x5r = hide_double(2.0); + double x6 = hide_double(make_double(i6)); + double x6r = hide_double(make_double(i6r)); + double x7 = hide_double(make_double(i7)); + double x7r = hide_double(make_double(i7r)); + double x8 = hide_double(make_double(i8)); + double x8r = hide_double(make_double(i8r)); + double x9 = hide_double(make_double(i9)); + double x9r = hide_double(make_double(i9r)); + double x10 = hide_double(make_double(i10)); + double x10r = hide_double(make_double(i10r)); + double x11 = hide_double(make_double(i11)); + double x11r = hide_double(make_double(i11r)); + + vec_double2 x0_v = spu_splats(x0); + vec_double2 x1_v = spu_splats(x1); + vec_double2 x2_v = spu_splats(x2); + vec_double2 x3_v = spu_splats(x3); + vec_double2 x4_v = spu_splats(x4); + vec_double2 x5_v = spu_splats(x5); + vec_double2 x5r_v = spu_splats(x5r); + vec_double2 x6_v = spu_splats(x6); + vec_double2 x6r_v = spu_splats(x6r); + vec_double2 x7_v = spu_splats(x7); + vec_double2 x7r_v = spu_splats(x7r); + vec_double2 x8_v = spu_splats(x8); + vec_double2 x8r_v = spu_splats(x8r); + vec_double2 x9_v = spu_splats(x9); + vec_double2 x9r_v = spu_splats(x9r); + vec_double2 x10_v = spu_splats(x10); + vec_double2 x10r_v = spu_splats(x10r); + vec_double2 x11_v = spu_splats(x11); + vec_double2 x11r_v = spu_splats(x11r); + + vec_double2 res_v; + + TEST_START("sqrtd2"); + res_v = sqrtd2(x0_v); + TEST_CHECK("20040928181422EJL", allnan_double2( res_v ), 0); + res_v = sqrtd2(x1_v); + TEST_CHECK("20040928181424EJL", allposinf_double2( res_v ), 0); + res_v = sqrtd2(x2_v); + TEST_CHECK("20040928181426EJL", allposzero_double2( res_v ), 0); + res_v = sqrtd2(x3_v); + TEST_CHECK("20040928181430EJL", allnegzero_double2( res_v ), 0); + res_v = sqrtd2(x4_v); + TEST_CHECK("20040928181432EJL", allnan_double2( res_v ), 0); + res_v = sqrtd2(x5_v); + TEST_CHECK("20040928181434EJL", allequal_double2( res_v, x5r_v ), 0); + res_v = sqrtd2(x6_v); + TEST_CHECK("20040928181436EJL", allequal_ulps_double2( res_v, x6r_v, 1 ), 0); + res_v = sqrtd2(x7_v); + TEST_CHECK("20040928181438EJL", allequal_ulps_double2( res_v, x7r_v, 1 ), 0); + res_v = sqrtd2(x8_v); + TEST_CHECK("20040928181440EJL", allequal_ulps_double2( res_v, x8r_v, 1 ), 0); + res_v = sqrtd2(x9_v); + TEST_CHECK("20040928181442EJL", allequal_ulps_double2( res_v, x9r_v, 1 ), 0); + res_v = sqrtd2(x10_v); + TEST_CHECK("20040928181444EJL", allequal_ulps_double2( res_v, x10r_v, 1 ), 0); + res_v = sqrtd2(x11_v); + TEST_CHECK("20040928181446EJL", allequal_ulps_double2( res_v, x11r_v, 1 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/sqrtf4.c b/Extras/simdmathlibrary/spu/tests/sqrtf4.c new file mode 100644 index 000000000..736f8120e --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/sqrtf4.c @@ -0,0 +1,92 @@ +/* Test sqrtf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +int main() +{ + TEST_SET_START("20040928182549EJL","EJL", "sqrtf4"); + + unsigned int i3 = 0x742c4455; + unsigned int i3r = 0x59d20034; + unsigned int i4 = 0x75e7753f; + unsigned int i4r = 0x5aac1fb5; + unsigned int i5 = 0x4baa9e3c; + unsigned int i5r = 0x4593c7d8; + unsigned int i6 = 0x39344296; + unsigned int i6r = 0x3c56d14c; + unsigned int i7 = 0x68a586b0; + unsigned int i7r = 0x54118f09; + + float x3 = hide_float(make_float(i3)); + float x3r = hide_float(make_float(i3r)); + float x4 = hide_float(make_float(i4)); + float x4r = hide_float(make_float(i4r)); + float x5 = hide_float(make_float(i5)); + float x5r = hide_float(make_float(i5r)); + float x6 = hide_float(make_float(i6)); + float x6r = hide_float(make_float(i6r)); + float x7 = hide_float(make_float(i7)); + float x7r = hide_float(make_float(i7r)); + + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3r_v = spu_splats(x3r); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4r_v = spu_splats(x4r); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5r_v = spu_splats(x5r); + vec_float4 x6_v = spu_splats(x6); + vec_float4 x6r_v = spu_splats(x6r); + vec_float4 x7_v = spu_splats(x7); + vec_float4 x7r_v = spu_splats(x7r); + + vec_float4 res_v; + + TEST_START("sqrtf4"); + res_v = sqrtf4(x3_v); + TEST_CHECK("20040928182552EJL", allequal_ulps_float4( res_v, x3r_v, 2 ), 0); + res_v = sqrtf4(x4_v); + TEST_CHECK("20040928182554EJL", allequal_ulps_float4( res_v, x4r_v, 2 ), 0); + res_v = sqrtf4(x5_v); + TEST_CHECK("20040928182556EJL", allequal_ulps_float4( res_v, x5r_v, 2 ), 0); + res_v = sqrtf4(x6_v); + TEST_CHECK("20040928182557EJL", allequal_ulps_float4( res_v, x6r_v, 2 ), 0); + res_v = sqrtf4(x7_v); + TEST_CHECK("20040928182559EJL", allequal_ulps_float4( res_v, x7r_v, 2 ), 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/tests/testutils.c b/Extras/simdmathlibrary/spu/tests/testutils.c new file mode 100644 index 000000000..350110f03 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/testutils.c @@ -0,0 +1,321 @@ +/* Common part of testsuite for SPU SIMD Math library + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "testutils.h" + +typedef union { + unsigned int ui; + float f; +} conv4_t; + +typedef union { + unsigned long long ull; + double df; +} conv8_t; + +unsigned int +hide_uint( unsigned int x ) +{ + return x; +} + +int +hide_int( int x ) +{ + return x; +} + +float +hide_float( float x ) +{ + return x; +} + +double +hide_double( double x ) +{ + return x; +} + +float +make_float( unsigned int x ) +{ + conv4_t val; + val.ui = x; + return val.f; +} + +unsigned int +make_uint( float x ) +{ + conv4_t val; + val.f = x; + return val.ui; +} + +double +make_double( unsigned long long x ) +{ + conv8_t val; + val.ull = x; + return val.df; +} + +unsigned long long +make_ulonglong( double x ) +{ + conv8_t val; + val.df = x; + return val.ull; +} + +vec_uint4 bitDiff_f4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return spu_sub((vec_uint4)spu_splats(32), spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)))); +} + +unsigned int bitDiff_f(float ref, float val) { + return spu_extract(bitDiff_f4(spu_promote(ref,0), spu_promote(val,0)), 0); +} + +vec_ullong2 bitDiff_d2(vec_double2 ref, vec_double2 vals) { + double ref0, ref1, vals0, vals1; + long long refi0, refi1, valsi0, valsi1, diff0, diff1; + vec_ullong2 bits; + + ref0 = spu_extract(ref,0); + ref1 = spu_extract(ref,1); + vals0 = spu_extract(vals,0); + vals1 = spu_extract(vals,1); + + refi0 = make_ulonglong(ref0); + refi1 = make_ulonglong(ref1); + valsi0 = make_ulonglong(vals0); + valsi1 = make_ulonglong(vals1); + + diff0 = refi0 - valsi0; + diff1 = refi1 - valsi1; + + if ( diff0 < 0 ) + { + diff0 = valsi0 - refi0; + } + + if ( diff1 < 0 ) + { + diff1 = valsi1 - refi1; + } + + bits = spu_promote( (unsigned long long)ceil(log2((double)diff0)), 0 ); + bits = spu_insert( (unsigned long long)ceil(log2((double)diff1)), bits, 1 ); + + return bits; +} + +unsigned long long bitDiff_d(double ref, double val) { + return spu_extract(bitDiff_d2(spu_promote(ref,0), spu_promote(val,0)), 0); +} + +vec_uint4 ulpDiff_f4(vec_float4 ref, vec_float4 vals) { + vec_int4 refi = (vec_int4)ref; + vec_int4 valsi = (vec_int4)vals; + vec_int4 diff = spu_sub(refi, valsi); + vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); + + return (vec_uint4)(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); +} + +unsigned int ulpDiff_f(float ref, float val) { + return spu_extract(ulpDiff_f4(spu_promote(ref,0), spu_promote(val,0)), 0); +} + +vec_ullong2 ulpDiff_d2(vec_double2 ref, vec_double2 vals) { + double ref0, ref1, vals0, vals1; + long long refi0, refi1, valsi0, valsi1, diff0, diff1; + vec_ullong2 ulps; + + ref0 = spu_extract(ref,0); + ref1 = spu_extract(ref,1); + vals0 = spu_extract(vals,0); + vals1 = spu_extract(vals,1); + + refi0 = make_ulonglong(ref0); + refi1 = make_ulonglong(ref1); + valsi0 = make_ulonglong(vals0); + valsi1 = make_ulonglong(vals1); + + diff0 = refi0 - valsi0; + diff1 = refi1 - valsi1; + + if ( diff0 < 0 ) + { + diff0 = valsi0 - refi0; + } + + if ( diff1 < 0 ) + { + diff1 = valsi1 - refi1; + } + + ulps = spu_promote( (unsigned long long)diff0, 0 ); + ulps = spu_insert( (unsigned long long)diff1, ulps, 1 ); + + return ulps; +} + +unsigned long long ulpDiff_d(double ref, double val) { + return spu_extract(ulpDiff_d2(spu_promote(ref,0), spu_promote(val,0)), 0); +} + +vec_ullong2 cmpposzerod2( vec_double2 x ) +{ + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + + cmp = (vec_ullong2)spu_cmpeq( (vec_int4)x, spu_splats(0) ); + cmp = spu_and( spu_shuffle( cmp, cmp, even ), spu_shuffle( cmp, cmp, odd ) ); + + return cmp; +} + +vec_ullong2 cmpnegzerod2( vec_double2 x ) +{ + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + + cmp = (vec_ullong2)spu_cmpeq( (vec_int4)x, (vec_int4)spu_splats(0x8000000000000000ull) ); + cmp = spu_and( spu_shuffle( cmp, cmp, even ), spu_shuffle( cmp, cmp, odd ) ); + + return cmp; +} + +int allequal_int4( vec_int4 x, vec_int4 y ) +{ + return ( spu_extract( spu_gather( spu_cmpeq( x, y ) ), 0 ) == 0xf ); +} +int allequal_llong2( vec_llong2 x, vec_llong2 y ) +{ + return spu_extract( spu_gather( spu_cmpeq ((vec_int4)(x - y), spu_splats((int)0) )), 0) == 0xF; +} + +int allequal_float4( vec_float4 x, vec_float4 y ) +{ + return ( spu_extract( spu_gather( (vec_uint4)spu_cmpeq( x, y ) ), 0 ) == 0xf ); +} + +int allequal_double2( vec_double2 x, vec_double2 y ) +{ + return ( spu_extract(x,0) == spu_extract(y,0) && spu_extract(x,1) == spu_extract(y,1) ); +} + +int allequal_llroundf4( llroundf4_t x, llroundf4_t y ) +{ + return ( spu_extract(x.vll[0],0) == spu_extract(y.vll[0],0) && + spu_extract(x.vll[0],1) == spu_extract(y.vll[0],1) && + spu_extract(x.vll[1],0) == spu_extract(y.vll[1],0) && + spu_extract(x.vll[1],1) == spu_extract(y.vll[1],1) ); +} + +int allequal_ulps_float4( vec_float4 x, vec_float4 y, int tolerance ) +{ + vec_uint4 ulps = ulpDiff_f4( x, y ); + return ( (int)spu_extract(ulps,0) <= tolerance && + (int)spu_extract(ulps,1) <= tolerance && + (int)spu_extract(ulps,2) <= tolerance && + (int)spu_extract(ulps,3) <= tolerance ); +} + +int allequal_ulps_double2( vec_double2 x, vec_double2 y, int tolerance ) +{ + vec_ullong2 ulps = ulpDiff_d2( x, y ); + return ( (int)spu_extract(ulps,0) <= tolerance && (int)spu_extract(ulps,1) <= tolerance ); +} + +int allequal_bits_float4( vec_float4 x, vec_float4 y, int tolerance ) +{ + vec_uint4 bits = bitDiff_f4( x, y ); + return ( (int)spu_extract(bits,0) <= tolerance && + (int)spu_extract(bits,1) <= tolerance && + (int)spu_extract(bits,2) <= tolerance && + (int)spu_extract(bits,3) <= tolerance ); +} + +int allequal_bits_double2( vec_double2 x, vec_double2 y, int tolerance ) +{ + vec_ullong2 bits = bitDiff_d2( x, y ); + return ( (int)spu_extract(bits,0) <= tolerance && (int)spu_extract(bits,1) <= tolerance ); +} + +int allposinf_double2( vec_double2 x ) +{ + vec_ullong2 posinf = spu_andc( isinfd2 ( x ), signbitd2 ( x ) ); + return ( spu_extract(posinf,0) != 0 && spu_extract(posinf,1) != 0 ); +} + +int allneginf_double2( vec_double2 x ) +{ + vec_ullong2 neginf = spu_and( isinfd2 ( x ), signbitd2 ( x ) ); + return ( spu_extract(neginf,0) != 0 && spu_extract(neginf,1) != 0 ); +} + +int allzerodenorm_double2( vec_double2 x ) +{ + vec_ullong2 zero = is0denormd2 ( x ); + return ( spu_extract(zero,0) != 0 && spu_extract(zero,1) != 0 ); +} + +int allposzero_double2( vec_double2 x ) +{ + vec_ullong2 poszero = cmpposzerod2( x ); + return ( spu_extract(poszero,0) != 0 && spu_extract(poszero,1) != 0 ); +} + +int allnegzero_double2( vec_double2 x ) +{ + vec_ullong2 negzero = cmpnegzerod2( x ); + return ( spu_extract(negzero,0) != 0 && spu_extract(negzero,1) != 0 ); +} + +int allnan_double2( vec_double2 x ) +{ + vec_ullong2 nan = isnand2 ( x ); + return ( spu_extract(nan,0) != 0 && spu_extract(nan,1) != 0 ); +} + + diff --git a/Extras/simdmathlibrary/spu/tests/testutils.h b/Extras/simdmathlibrary/spu/tests/testutils.h new file mode 100644 index 000000000..f207bc460 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/testutils.h @@ -0,0 +1,85 @@ +/* Common part for SPU SIMD Math library testsuite + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + + +#ifndef _TESTUTILS_H_ + +#include "floatingpoint_tests.h" + +extern unsigned int hide_uint( unsigned int x ); +extern int hide_int( int x ); +extern float hide_float( float x ); +extern double hide_double( double x ); + +extern float make_float( unsigned int x ); +extern unsigned int make_uint( float x ); +extern double make_double( unsigned long long x ); +extern unsigned long long make_ulonglong( double x ); + +extern vec_uint4 bitDiff_f4(vec_float4 ref, vec_float4 vals); +extern unsigned int bitDiff_f(float ref, float val); + +extern vec_ullong2 bitDiff_d2(vec_double2 ref, vec_double2 vals); +extern unsigned long long bitDiff_d(double ref, double val); + +extern vec_uint4 ulpDiff_f4(vec_float4 ref, vec_float4 vals); +extern unsigned int ulpDiff_f(float ref, float val); + +extern vec_ullong2 ulpDiff_d2(vec_double2 ref, vec_double2 vals); +extern unsigned long long ulpDiff_d(double ref, double val); + +extern vec_ullong2 cmpposzerod2( vec_double2 x ); +extern vec_ullong2 cmpnegzerod2( vec_double2 x ); +extern int allequal_int4( vec_int4 x, vec_int4 y ); +static inline int allequal_uint4( vec_uint4 x, vec_uint4 y ) +{ + return allequal_int4 ((vec_int4)x, (vec_int4)y); +} +extern int allequal_llong2( vec_llong2 x, vec_llong2 y ); +static inline int allequal_ullong2( vec_ullong2 x, vec_ullong2 y ) +{ + return allequal_llong2((vec_llong2)x, (vec_llong2)y); +} +extern int allequal_float4( vec_float4 x, vec_float4 y ); +extern int allequal_double2( vec_double2 x, vec_double2 y ); +extern int allequal_llroundf4( llroundf4_t x, llroundf4_t y ); +extern int allequal_ulps_float4( vec_float4 x, vec_float4 y, int tolerance ); +extern int allequal_ulps_double2( vec_double2 x, vec_double2 y, int tolerance ); +extern int allequal_bits_float4( vec_float4 x, vec_float4 y, int tolerance ); +extern int allequal_bits_double2( vec_double2 x, vec_double2 y, int tolerance ); +extern int allposinf_double2( vec_double2 x ); +extern int allneginf_double2( vec_double2 x ); +extern int allzerodenorm_double2( vec_double2 x ); +extern int allposzero_double2( vec_double2 x ); +extern int allnegzero_double2( vec_double2 x ); +extern int allnan_double2( vec_double2 x ); + +#endif diff --git a/Extras/simdmathlibrary/spu/tests/truncd2.c b/Extras/simdmathlibrary/spu/tests/truncd2.c new file mode 100644 index 000000000..105a72441 --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/truncd2.c @@ -0,0 +1,145 @@ +/* Test truncd2 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +/** + * + *@@ truncd2 - Round the input to the nearest integer. + * Always rounds towards 0. + * + *@brief + * boundary test for nextafterd2. + * + * + *@pre + * + *@criteria + * Run this program and check no error will be occurred. + * + *@note + * + * + **/ + +#include +#include +#include +//#include +#include + +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + +typedef union { + struct { + double xxx[2]; + double ans[2]; + } dbl; + struct { + unsigned long long xxx[2]; + unsigned long long ans[2]; + } ull; +} TestVec_TruncD2; + +int main() +{ + TestVec_TruncD2 test_a[] = { + { + ull:{ + // 0 -> 0 , -0 -> -0 + {0x0000000000000000ULL,0x8000000000000000ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // -Inf -> -Inf , Inf -> Inf + {0xFFF0000000000000ULL,0x7FF0000000000000ULL}, + {0xFFF0000000000000ULL,0x7FF0000000000000ULL} + } + },{ + ull:{ + // MAX -> MAX , MIN -> MIN + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL}, + {0x7FEFFFFFFFFFFFFFULL,0xFFEFFFFFFFFFFFFFULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x0000000000000001ULL,0x8000000000000010ULL}, + {0x0000000000000000ULL,0x8000000000000000ULL} + } + },{ + ull:{ + // Denormalize -> 0 + {0x800FFFFFFFFFFFFFULL,0x000FFFFFFFFFFFFFULL}, + {0x8000000000000000ULL,0x0000000000000000ULL} + } + },{ + dbl:{ + {1.0, -1.0}, + {1.0, -1.0} + } + },{ + dbl:{ + {-2.5, 3.5}, + {-2.0, 3.0} + } + },{ + ull:{ + // Nan + {0xFFF0000000000001ULL,0x7FF0000000000001ULL}, + {0xFFF0000000000001ULL,0x7FF0000000000001ULL} + } + },{ + ull:{ + {0ULL,0ULL}, + {0ULL,0ULL} + } + } + }; + int ii, test_ctr = 1; + char msg[80]; + vec_double2 res_v; + + TEST_SET_START("20060831180000NM","NM", "truncd2"); + + TEST_START("truncd2"); + + for (ii=0; ; ii++) { + if ( (test_a[ii].ull.xxx[0] == 0) && (test_a[ii].ull.xxx[1] == 0) ) break; + + res_v = truncd2 (*((vec_double2 *)&test_a[ii].dbl.xxx[0]) ); + sprintf(msg,"2006083118%04dNM", test_ctr++); + TEST_CHECK(msg, allequal_llong2( (vec_llong2)res_v, (vec_llong2)*((vec_double2 *)&test_a[ii].dbl.ans[0])), 0); + } + + TEST_SET_DONE(); + + TEST_EXIT(); + +} diff --git a/Extras/simdmathlibrary/spu/tests/truncf4.c b/Extras/simdmathlibrary/spu/tests/truncf4.c new file mode 100644 index 000000000..7085ddd4a --- /dev/null +++ b/Extras/simdmathlibrary/spu/tests/truncf4.c @@ -0,0 +1,108 @@ +/* Test trunf4 for SPU + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include "simdmath.h" +#include "common-test.h" +#include "testutils.h" + + +int main() +{ + TEST_SET_START("20040916100012EJL","EJL", "truncf"); + + unsigned int i3 = 0x4affffff; // 2^23 - 0.5, largest truncatable value. + unsigned int i3i = 0x4afffffe; + unsigned int i4 = 0x4b000000; // 2^23, no fractional part. + unsigned int i5 = 0xcf000001; // -2^31, one more large, and negative, value. + + float x0 = hide_float(0.91825f); + float x0i = hide_float(0.0f); + float x1 = hide_float(-0.12958f); + float x1i = hide_float(0.0f); + float x2 = hide_float(-79615.1875f); + float x2i = hide_float(-79615.0f); + float x3 = hide_float(make_float(i3)); + float x3i = hide_float(make_float(i3i)); + float x4 = hide_float(make_float(i4)); + float x4i = hide_float(make_float(i4)); + float x5 = hide_float(make_float(i5)); + float x5i = hide_float(make_float(i5)); + + vec_float4 x0_v = spu_splats(x0); + vec_float4 x0i_v = spu_splats(x0i); + vec_float4 x1_v = spu_splats(x1); + vec_float4 x1i_v = spu_splats(x1i); + vec_float4 x2_v = spu_splats(x2); + vec_float4 x2i_v = spu_splats(x2i); + vec_float4 x3_v = spu_splats(x3); + vec_float4 x3i_v = spu_splats(x3i); + vec_float4 x4_v = spu_splats(x4); + vec_float4 x4i_v = spu_splats(x4i); + vec_float4 x5_v = spu_splats(x5); + vec_float4 x5i_v = spu_splats(x5i); + + float res; + vec_float4 res_v; + + TEST_START("truncf4"); + res_v = truncf4(x0_v); + TEST_CHECK("20040916100023EJL", allequal_float4( res_v, x0i_v ), 0); + res_v = truncf4(x1_v); + TEST_CHECK("20040916100034EJL", allequal_float4( res_v, x1i_v ), 0); + res_v = truncf4(x2_v); + TEST_CHECK("20040916100043EJL", allequal_float4( res_v, x2i_v ), 0); + res_v = truncf4(x3_v); + TEST_CHECK("20040916100054EJL", allequal_float4( res_v, x3i_v ), 0); + res_v = truncf4(x4_v); + TEST_CHECK("20040916100103EJL", allequal_float4( res_v, x4i_v ), 0); + res_v = truncf4(x5_v); + TEST_CHECK("20040916100111EJL", allequal_float4( res_v, x5i_v ), 0); + + TEST_START("truncf"); + res = truncf(x0); + TEST_CHECK("20040916100121EJL", res == x0i, 0); + res = truncf(x1); + TEST_CHECK("20040916100129EJL", res == x1i, 0); + res = truncf(x2); + TEST_CHECK("20040916100136EJL", res == x2i, 0); + res = truncf(x3); + TEST_CHECK("20040916100144EJL", res == x3i, 0); + res = truncf(x4); + TEST_CHECK("20040916100153EJL", res == x4i, 0); + res = truncf(x5); + TEST_CHECK("20040916100200EJL", res == x5i, 0); + + TEST_SET_DONE(); + + TEST_EXIT(); +} diff --git a/Extras/simdmathlibrary/spu/truncd2.c b/Extras/simdmathlibrary/spu/truncd2.c new file mode 100644 index 000000000..3d133c53d --- /dev/null +++ b/Extras/simdmathlibrary/spu/truncd2.c @@ -0,0 +1,63 @@ +/* truncd2 - Round the input to the nearest integer. + Always rounds towards 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector double +truncd2(vector double in) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 exp, shift; + vec_uint4 sign = ((vec_uint4){ 0x80000000, 0, 0x80000000, 0}); + vec_uint4 or_mask, and_mask, mask; + vec_double2 in_hi, out; + + /* Construct a mask to remove the fraction bits. The mask + * depends on the exponent of the floating point + * input value. + */ + in_hi = spu_shuffle(in, in, splat_hi); + exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF); + + shift = spu_sub(((vec_int4){ 1023, 1043, 1023, 1043}), exp); + or_mask = spu_andc(spu_cmpgt(shift, 0), sign); + + and_mask = spu_rlmask(((vec_uint4){ 0xFFFFF, -1, 0xFFFFF, -1}), shift); + mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -32)), or_mask); + + /* Apply the mask and return the result. + */ + out = spu_andc(in, (vec_double2)(mask)); + + return (out); +} + + diff --git a/Extras/simdmathlibrary/spu/truncf4.c b/Extras/simdmathlibrary/spu/truncf4.c new file mode 100644 index 000000000..11ef30649 --- /dev/null +++ b/Extras/simdmathlibrary/spu/truncf4.c @@ -0,0 +1,48 @@ +/* truncf4 - for each of four float slots, round towards zero to integer value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +vector float +truncf4 (vector float x) +{ + vector signed int xi; + vector unsigned int inrange; + + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. + + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); +} +