diff --git a/Extras/simdmathlibrary/LICENSE b/Extras/simdmathlibrary/LICENSE deleted file mode 100644 index 3ceee30c6..000000000 --- a/Extras/simdmathlibrary/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -/* SIMD math library functions for both the PowerPC (PPU) and the SPU. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ diff --git a/Extras/simdmathlibrary/Makefile b/Extras/simdmathlibrary/Makefile index f08e28d94..4d92a47a8 100644 --- a/Extras/simdmathlibrary/Makefile +++ b/Extras/simdmathlibrary/Makefile @@ -32,15 +32,21 @@ # # make ARCH_PPU=32 # -# To use "gcc" instead of "ppu-gcc". +# To build 64 bit libraries: # -# make CROSS_PPU= +# make ARCH_PPU=64 +# +# To use "ppu-gcc" instead of "gcc". +# +# make CROSS_PPU=ppu- prefix = /usr +prefix_ppu = $(prefix) +prefix_spu = $(prefix)/spu DESTDIR = -ARCH_PPU = 64 -CROSS_PPU = ppu- +ARCH_PPU = 32 +CROSS_PPU = AR_PPU = $(CROSS_PPU)ar CC_PPU = $(CROSS_PPU)gcc CXX_PPU = $(CROSS_PPU)g++ @@ -60,9 +66,12 @@ INSTALL = install MAKE_DEFS = \ prefix='$(prefix)' \ + prefix_ppu='$(prefix_ppu)' \ + prefix_spu='$(prefix_spu)' \ DESTDIR='$(DESTDIR)' \ LIB_MAJOR_VERSION='$(LIB_MAJOR_VERSION)' \ LIB_MINOR_VERSION='$(LIB_MINOR_VERSION)' \ + LIB_RELEASE='$(LIB_RELEASE)' \ LIB_BASE='$(LIB_BASE)' \ LIB_NAME='$(LIB_NAME)' \ STATIC_LIB='$(STATIC_LIB)' \ @@ -85,12 +94,17 @@ MAKE_DEFS = \ LIB_MAJOR_VERSION = 1 LIB_MINOR_VERSION = 0 +LIB_RELEASE = 1 +LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE) LIB_BASE = simdmath LIB_NAME = lib$(LIB_BASE) STATIC_LIB = $(LIB_NAME).a SHARED_LIB = $(LIB_NAME).so +TAR_NAME = $(LIB_BASE)-$(LIB_FULL_VERSION) +TAR_BALL = $(TAR_NAME).tar.gz + all: spu_library ppu_library spu_library: @@ -99,7 +113,10 @@ spu_library: ppu_library: cd ppu; $(MAKE) $(MAKE_DEFS) -install: spu_install ppu_install +install: common_install spu_install ppu_install + +common_install: + cd common; $(MAKE) $(MAKE_DEFS) install spu_install: cd spu; $(MAKE) $(MAKE_DEFS) install @@ -108,8 +125,10 @@ ppu_install: cd ppu; $(MAKE) $(MAKE_DEFS) install clean: + cd common; $(MAKE) $(MAKE_DEFS) clean cd spu; $(MAKE) $(MAKE_DEFS) clean cd ppu; $(MAKE) $(MAKE_DEFS) clean + -rm -rf $(TAR_BALL) .dist check: check_ppu check_spu @@ -118,3 +137,14 @@ check_ppu: check_spu: cd spu; $(MAKE) $(MAKE_DEFS) check + +dist: + -rm -rf .dist + mkdir -p .dist/$(TAR_NAME) + find . -name .dist -prune -o \ + -name .CVS -prune -o -name .svn -prune -o \ + -name .pc -prune -o -name patches -prune -o \ + '(' -name Makefile -o -name '*.[ch]' -o -name '*.spec' ')' \ + -print | tar -T - -cf - | tar xf - -C .dist/$(TAR_NAME) + tar zcf $(TAR_BALL) -C .dist $(TAR_NAME) + -rm -rf .dist diff --git a/Extras/simdmathlibrary/common/Makefile b/Extras/simdmathlibrary/common/Makefile new file mode 100644 index 000000000..93f36f360 --- /dev/null +++ b/Extras/simdmathlibrary/common/Makefile @@ -0,0 +1,59 @@ +# make file to build the libsimdmath library for PPU and SPU +# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the +# following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the Sony Computer Entertainment Inc nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +prefix = /usr +prefix_ppu = $(prefix) +prefix_spu = $(prefix)/spu +DESTDIR = + +INSTALL = install + +MAKE_DEFS = \ + prefix='$(prefix)' \ + prefix_ppu='$(prefix_ppu)' \ + prefix_spu='$(prefix_spu)' \ + DESTDIR='$(DESTDIR)' \ + INSTALL='$(INSTALL)' + +TARGET_PREFIX = $(prefix) + +all: + @true + +install: + $(MAKE) $(MAKE_DEFS) TARGET_PREFIX=$(prefix_ppu) do_install + $(MAKE) $(MAKE_DEFS) TARGET_PREFIX=$(prefix_spu) do_install + +do_install: + $(INSTALL) -m 755 -d $(DESTDIR)$(TARGET_PREFIX)/include + $(INSTALL) -m 755 -d $(DESTDIR)$(TARGET_PREFIX)/include/simdmath + $(INSTALL) -m 644 simdmath.h $(DESTDIR)$(TARGET_PREFIX)/include/ + $(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(TARGET_PREFIX)/include/simdmath/ + +clean: + @true diff --git a/Extras/simdmathlibrary/ppu/absi4.c b/Extras/simdmathlibrary/common/absi4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/absi4.c rename to Extras/simdmathlibrary/common/absi4.c index dc0d8b474..220103e5e 100644 --- a/Extras/simdmathlibrary/ppu/absi4.c +++ b/Extras/simdmathlibrary/common/absi4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ - - -#include -#include +#include vector signed int absi4 (vector signed int x) { - return vec_abs( x ); + return _absi4( x ); } - diff --git a/Extras/simdmathlibrary/spu/fmaf4.c b/Extras/simdmathlibrary/common/acosf4.c similarity index 92% rename from Extras/simdmathlibrary/spu/fmaf4.c rename to Extras/simdmathlibrary/common/acosf4.c index 0c54d1e10..f43eacb57 100644 --- a/Extras/simdmathlibrary/spu/fmaf4.c +++ b/Extras/simdmathlibrary/common/acosf4.c @@ -1,4 +1,4 @@ -/* fmaf4 +/* acosf4 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. All rights reserved. @@ -27,12 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ - -#include -#include +#include vector float -fmaf4 (vector float x, vector float y, vector float z) +acosf4 (vector float x) { - return spu_madd(x,y,z); + return _acosf4( x ); } diff --git a/Extras/simdmathlibrary/common/asinf4.c b/Extras/simdmathlibrary/common/asinf4.c new file mode 100644 index 000000000..b71329238 --- /dev/null +++ b/Extras/simdmathlibrary/common/asinf4.c @@ -0,0 +1,36 @@ +/* asinf4 - Computes the inverse sine of all four slots of x + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +asinf4 (vector float x) +{ + return _asinf4( x ); +} diff --git a/Extras/simdmathlibrary/common/atan2f4.c b/Extras/simdmathlibrary/common/atan2f4.c new file mode 100644 index 000000000..c7282f6f7 --- /dev/null +++ b/Extras/simdmathlibrary/common/atan2f4.c @@ -0,0 +1,36 @@ +/* atan2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +atan2f4 (vector float y, vector float x) +{ + return _atan2f4( y, x ); +} diff --git a/Extras/simdmathlibrary/common/atanf4.c b/Extras/simdmathlibrary/common/atanf4.c new file mode 100644 index 000000000..0fc50733b --- /dev/null +++ b/Extras/simdmathlibrary/common/atanf4.c @@ -0,0 +1,36 @@ +/* atanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +atanf4 (vector float x) +{ + return _atanf4( x ); +} diff --git a/Extras/simdmathlibrary/common/cbrtf4.c b/Extras/simdmathlibrary/common/cbrtf4.c new file mode 100644 index 000000000..a51e8b1a0 --- /dev/null +++ b/Extras/simdmathlibrary/common/cbrtf4.c @@ -0,0 +1,36 @@ +/* cbrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +cbrtf4 (vector float x) +{ + return _cbrtf4( x ); +} diff --git a/Extras/simdmathlibrary/common/ceild2.c b/Extras/simdmathlibrary/common/ceild2.c new file mode 100644 index 000000000..be4f3ffa2 --- /dev/null +++ b/Extras/simdmathlibrary/common/ceild2.c @@ -0,0 +1,36 @@ +/* ceild2 - for each of two doule slots, round up to smallest integer not less than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +ceild2(vector double in) +{ + return _ceild2(in); +} diff --git a/Extras/simdmathlibrary/ppu/ceilf4.c b/Extras/simdmathlibrary/common/ceilf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/ceilf4.c rename to Extras/simdmathlibrary/common/ceilf4.c index ebdc03f50..d5de69d8b 100644 --- a/Extras/simdmathlibrary/ppu/ceilf4.c +++ b/Extras/simdmathlibrary/common/ceilf4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float ceilf4 (vector float x) { - return vec_ceil( x ); + return _ceilf4( x ); } - diff --git a/Extras/simdmathlibrary/ppu/tests/common-test.h b/Extras/simdmathlibrary/common/common-test.h similarity index 91% rename from Extras/simdmathlibrary/ppu/tests/common-test.h rename to Extras/simdmathlibrary/common/common-test.h index c0da56001..175effa6e 100644 --- a/Extras/simdmathlibrary/ppu/tests/common-test.h +++ b/Extras/simdmathlibrary/common/common-test.h @@ -30,7 +30,24 @@ #include -static inline unsigned long long clock() +#ifdef __SPU__ + +#include + +static inline unsigned long long clock_start(void) +{ + spu_writech(SPU_WrDec, 0); + return -spu_readch(SPU_RdDec); +} + +static inline unsigned long long clock_stop(void) +{ + return -spu_readch(SPU_RdDec); +} + +#else + +static inline unsigned long long clock(void) { unsigned long long ret; /* This need to be fixed for the hardware errata. */ @@ -40,6 +57,19 @@ static inline unsigned long long clock() : "memory"); return (ret); } + +static inline unsigned long long clock_start(void) +{ + return clock(); +} + +static inline unsigned long long clock_stop(void) +{ + return clock(); +} + +#endif + // Test files begin with TEST_SET_START("your initials","test set description") // Individual tests begin with TEST_START("name of test") // and end with TEST_PASS(), TEST_FAIL("reason for failure") or TEST_CHECK() @@ -77,9 +107,9 @@ unsigned long long __description=description; \ __count=0; \ __passed=0; \ - __time=0; \ __ttime=0; \ printf("0\t%s\t%d\t%s\tSTART\tpassed\ttotal\ttime\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,__set_id, __description); \ + __time = clock_start(); \ } while(0) // TEST_START @@ -87,9 +117,9 @@ unsigned long long // name - brief name for this test #define TEST_START(name) \ do { \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + (void)clock_stop(); \ __name=name; \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __time = clock_start(); \ } while(0) // TEST_PASS @@ -98,13 +128,13 @@ unsigned long long // This should match the id provided to the matching TEST_FAIL call #define TEST_PASS(test_id) \ do { \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __ttemp=clock_stop(); \ __time=__ttemp-__time; \ __ttime+=__time; \ __count++; \ __passed++; \ printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%lld\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __time=clock_start(); \ } while(0) // TEST_FAIL @@ -114,13 +144,13 @@ unsigned long long // why - brief description of why it failed #define TEST_FAIL(test_id,why,error_code) \ do { \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __ttemp=clock_stop(); \ __time=__ttemp-__time; \ __ttime+=__time; \ __count++; \ __success=0; \ printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,error_code); \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __time=clock_start(); \ } while(0) // TEST_CHECK @@ -131,7 +161,7 @@ unsigned long long // test - expression evaluating to true/false #define TEST_CHECK(test_id,test,error_code) \ do { \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __ttemp=clock_stop(); \ __time=__ttemp-__time; \ __ttime+=__time; \ __count++; \ @@ -145,7 +175,7 @@ unsigned long long __success=0; \ printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: check %s failed\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,#test,error_code); \ } \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __time=clock_start(); \ } while(0) // TEST_FUNCTION @@ -159,7 +189,7 @@ unsigned long long do { \ TEST_START(name); \ int result=func; \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \ + __ttemp=clock_stop(); \ __time=__ttemp-__time; \ __ttime+=__time; \ __count++; \ @@ -173,7 +203,7 @@ unsigned long long __success=0; \ printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,result); \ } \ - __asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \ + __time=clock_start(); \ } while(0) // TEST_SET_DONE diff --git a/Extras/simdmathlibrary/spu/copysignd2.c b/Extras/simdmathlibrary/common/copysignd2.c similarity index 90% rename from Extras/simdmathlibrary/spu/copysignd2.c rename to Extras/simdmathlibrary/common/copysignd2.c index 335271ff2..0321c1ef6 100644 --- a/Extras/simdmathlibrary/spu/copysignd2.c +++ b/Extras/simdmathlibrary/common/copysignd2.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ +#include -#include -#include - - -vector double copysignd2 (vector double x, vector double y) +vector double +copysignd2 (vector double x, vector double y) { - return spu_sel( x, y, spu_splats(0x8000000000000000ull) ); + return _copysignd2(x, y); } - diff --git a/Extras/simdmathlibrary/spu/copysignf4.c b/Extras/simdmathlibrary/common/copysignf4.c similarity index 94% rename from Extras/simdmathlibrary/spu/copysignf4.c rename to Extras/simdmathlibrary/common/copysignf4.c index d58f6c1b5..88575310b 100644 --- a/Extras/simdmathlibrary/spu/copysignf4.c +++ b/Extras/simdmathlibrary/common/copysignf4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float copysignf4 (vector float x, vector float y) { - return spu_sel( x, y, spu_splats(0x80000000) ); + return _copysignf4( x, y ); } - diff --git a/Extras/simdmathlibrary/common/cosd2.c b/Extras/simdmathlibrary/common/cosd2.c new file mode 100644 index 000000000..8e9167f18 --- /dev/null +++ b/Extras/simdmathlibrary/common/cosd2.c @@ -0,0 +1,36 @@ +/* cosd2 - Computes the cosine of the each of two double slots. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +cosd2 (vector double x) +{ + return _cosd2(x); +} diff --git a/Extras/simdmathlibrary/common/cosf4.c b/Extras/simdmathlibrary/common/cosf4.c new file mode 100644 index 000000000..1570c5ee5 --- /dev/null +++ b/Extras/simdmathlibrary/common/cosf4.c @@ -0,0 +1,36 @@ +/* cosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +cosf4 (vector float x) +{ + return _cosf4( x ); +} diff --git a/Extras/simdmathlibrary/spu/divd2.c b/Extras/simdmathlibrary/common/divd2.c similarity index 90% rename from Extras/simdmathlibrary/spu/divd2.c rename to Extras/simdmathlibrary/common/divd2.c index f52da5d51..4c18c2d32 100644 --- a/Extras/simdmathlibrary/spu/divd2.c +++ b/Extras/simdmathlibrary/common/divd2.c @@ -27,15 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -// Equal to numer * recipd2(denom) -// See recipd2 for results of special values. - -#include -#include +#include vector double divd2 (vector double numer, vector double denom) { - return spu_mul( numer, recipd2( denom ) ); + return _divd2(numer, denom); } - diff --git a/Extras/simdmathlibrary/common/divf4.c b/Extras/simdmathlibrary/common/divf4.c new file mode 100644 index 000000000..45aa122ea --- /dev/null +++ b/Extras/simdmathlibrary/common/divf4.c @@ -0,0 +1,36 @@ +/* divf4 - for each of four float slots, divide numer by denom. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +divf4 (vector float numer, vector float denom) +{ + return _divf4( numer, denom ); +} diff --git a/Extras/simdmathlibrary/common/divi4.c b/Extras/simdmathlibrary/common/divi4.c new file mode 100644 index 000000000..7d2910a25 --- /dev/null +++ b/Extras/simdmathlibrary/common/divi4.c @@ -0,0 +1,36 @@ +/* divi4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +divi4_t +divi4 (vector signed int numer, vector signed int denom ) +{ + return _divi4( numer, denom ); +} diff --git a/Extras/simdmathlibrary/common/divu4.c b/Extras/simdmathlibrary/common/divu4.c new file mode 100644 index 000000000..011030030 --- /dev/null +++ b/Extras/simdmathlibrary/common/divu4.c @@ -0,0 +1,36 @@ +/* divu4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +divu4_t +divu4 (vector unsigned int numer, vector unsigned int denom) +{ + return _divu4(numer, denom); +} diff --git a/Extras/simdmathlibrary/common/exp2f4.c b/Extras/simdmathlibrary/common/exp2f4.c new file mode 100644 index 000000000..8e07fb0c8 --- /dev/null +++ b/Extras/simdmathlibrary/common/exp2f4.c @@ -0,0 +1,36 @@ +/* exp2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +exp2f4 (vector float x) +{ + return _exp2f4( x ); +} diff --git a/Extras/simdmathlibrary/common/expf4.c b/Extras/simdmathlibrary/common/expf4.c new file mode 100644 index 000000000..3950d4b62 --- /dev/null +++ b/Extras/simdmathlibrary/common/expf4.c @@ -0,0 +1,36 @@ +/* expf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +expf4 (vector float x) +{ + return _expf4( x ); +} diff --git a/Extras/simdmathlibrary/common/expm1f4.c b/Extras/simdmathlibrary/common/expm1f4.c new file mode 100644 index 000000000..faa429013 --- /dev/null +++ b/Extras/simdmathlibrary/common/expm1f4.c @@ -0,0 +1,36 @@ +/* expm1f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +expm1f4 (vector float x) +{ + return _expm1f4( x ); +} diff --git a/Extras/simdmathlibrary/spu/fabsd2.c b/Extras/simdmathlibrary/common/fabsd2.c similarity index 90% rename from Extras/simdmathlibrary/spu/fabsd2.c rename to Extras/simdmathlibrary/common/fabsd2.c index 26b155f59..e38e333fc 100644 --- a/Extras/simdmathlibrary/spu/fabsd2.c +++ b/Extras/simdmathlibrary/common/fabsd2.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include - -vector double fabsd2 (vector double x) +vector double +fabsd2 (vector double x) { - return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + return _fabsd2(x); } diff --git a/Extras/simdmathlibrary/ppu/fabsf4.c b/Extras/simdmathlibrary/common/fabsf4.c similarity index 94% rename from Extras/simdmathlibrary/ppu/fabsf4.c rename to Extras/simdmathlibrary/common/fabsf4.c index 4c0aa46e7..2fd7d2249 100644 --- a/Extras/simdmathlibrary/ppu/fabsf4.c +++ b/Extras/simdmathlibrary/common/fabsf4.c @@ -27,12 +27,11 @@ POSSIBILITY OF SUCH DAMAGE. */ +#include -#include -#include - -vector float fabsf4(vector float x) +vector float +fabsf4 (vector float x) { - return vec_abs( x ); + return _fabsf4( x ); } diff --git a/Extras/simdmathlibrary/spu/fdimd2.c b/Extras/simdmathlibrary/common/fdimd2.c similarity index 83% rename from Extras/simdmathlibrary/spu/fdimd2.c rename to Extras/simdmathlibrary/common/fdimd2.c index 12e7a3484..447f57b63 100644 --- a/Extras/simdmathlibrary/spu/fdimd2.c +++ b/Extras/simdmathlibrary/common/fdimd2.c @@ -27,20 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include -/* fdim_v - compute the positive difference of x and y. - */ vector double fdimd2 (vector double x, vector double y) { - vec_double2 v; - vec_uint4 mask; - - v = spu_sub(x, y); - mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); - v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31)); - - return (v); + return _fdimd2(x, y); } diff --git a/Extras/simdmathlibrary/ppu/fdimf4.c b/Extras/simdmathlibrary/common/fdimf4.c similarity index 91% rename from Extras/simdmathlibrary/ppu/fdimf4.c rename to Extras/simdmathlibrary/common/fdimf4.c index 5d230a92b..d476788a1 100644 --- a/Extras/simdmathlibrary/ppu/fdimf4.c +++ b/Extras/simdmathlibrary/common/fdimf4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float fdimf4 (vector float x, vector float y) { - vector float diff = vec_sub(x,y); - return vec_sel(((vector float){0.0f, 0.0f, 0.0f, 0.0f}), diff, vec_cmpgt(x,y)); + return _fdimf4( x, y ); } diff --git a/Extras/simdmathlibrary/common/floord2.c b/Extras/simdmathlibrary/common/floord2.c new file mode 100644 index 000000000..0dec5d5c2 --- /dev/null +++ b/Extras/simdmathlibrary/common/floord2.c @@ -0,0 +1,36 @@ +/* floord2 - for each of two doule slots, round up to smallest integer not more than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +floord2(vector double in) +{ + return _floord2(in); +} diff --git a/Extras/simdmathlibrary/ppu/floorf4.c b/Extras/simdmathlibrary/common/floorf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/floorf4.c rename to Extras/simdmathlibrary/common/floorf4.c index d3d81663e..3ef6e6f60 100644 --- a/Extras/simdmathlibrary/ppu/floorf4.c +++ b/Extras/simdmathlibrary/common/floorf4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float floorf4 (vector float x) { - return vec_floor( x ); + return _floorf4( x ); } - diff --git a/Extras/simdmathlibrary/spu/fmad2.c b/Extras/simdmathlibrary/common/fmad2.c similarity index 95% rename from Extras/simdmathlibrary/spu/fmad2.c rename to Extras/simdmathlibrary/common/fmad2.c index 32e7e6c18..b470080cf 100644 --- a/Extras/simdmathlibrary/spu/fmad2.c +++ b/Extras/simdmathlibrary/common/fmad2.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector double fmad2 (vector double x, vector double y, vector double z) { - return spu_madd(x,y,z); + return _fmad2(x, y, z); } diff --git a/Extras/simdmathlibrary/ppu/fmaf4.c b/Extras/simdmathlibrary/common/fmaf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/fmaf4.c rename to Extras/simdmathlibrary/common/fmaf4.c index 7bf2f62ea..b26afbcb8 100644 --- a/Extras/simdmathlibrary/ppu/fmaf4.c +++ b/Extras/simdmathlibrary/common/fmaf4.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector float fmaf4 (vector float x, vector float y, vector float z) { - return vec_madd(x,y,z); + return _fmaf4( x, y, z ); } diff --git a/Extras/simdmathlibrary/common/fmaxd2.c b/Extras/simdmathlibrary/common/fmaxd2.c new file mode 100644 index 000000000..1380f280e --- /dev/null +++ b/Extras/simdmathlibrary/common/fmaxd2.c @@ -0,0 +1,36 @@ +/* fmaxd2 - for each of two double slots, compute maximum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +fmaxd2 (vector double x, vector double y) +{ + return _fmaxd2(x, y); +} diff --git a/Extras/simdmathlibrary/ppu/fmaxf4.c b/Extras/simdmathlibrary/common/fmaxf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/fmaxf4.c rename to Extras/simdmathlibrary/common/fmaxf4.c index a236182cd..ca5169274 100644 --- a/Extras/simdmathlibrary/ppu/fmaxf4.c +++ b/Extras/simdmathlibrary/common/fmaxf4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ - -#include -#include - +#include vector float fmaxf4 (vector float x, vector float y) { - return vec_max( x, y ); + return _fmaxf4( x, y ); } - diff --git a/Extras/simdmathlibrary/common/fmind2.c b/Extras/simdmathlibrary/common/fmind2.c new file mode 100644 index 000000000..b30242f71 --- /dev/null +++ b/Extras/simdmathlibrary/common/fmind2.c @@ -0,0 +1,36 @@ +/* fmind2 - for each of two double slots, compute minimum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +fmind2 (vector double x, vector double y) +{ + return _fmind2(x, y); +} diff --git a/Extras/simdmathlibrary/ppu/fminf4.c b/Extras/simdmathlibrary/common/fminf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/fminf4.c rename to Extras/simdmathlibrary/common/fminf4.c index ac877827c..6bc2db8d4 100644 --- a/Extras/simdmathlibrary/ppu/fminf4.c +++ b/Extras/simdmathlibrary/common/fminf4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float fminf4 (vector float x, vector float y) { - return vec_min( x, y ); + return _fminf4( x, y ); } - diff --git a/Extras/simdmathlibrary/common/fmodd2.c b/Extras/simdmathlibrary/common/fmodd2.c new file mode 100644 index 000000000..7428a9ab2 --- /dev/null +++ b/Extras/simdmathlibrary/common/fmodd2.c @@ -0,0 +1,36 @@ +/* fmodd2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +fmodd2(vector double x, vector double y) +{ + return _fmodd2(x, y); +} diff --git a/Extras/simdmathlibrary/common/fmodf4.c b/Extras/simdmathlibrary/common/fmodf4.c new file mode 100644 index 000000000..d5446b44b --- /dev/null +++ b/Extras/simdmathlibrary/common/fmodf4.c @@ -0,0 +1,36 @@ +/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +fmodf4 (vector float x, vector float y) +{ + return _fmodf4( x, y ); +} diff --git a/Extras/simdmathlibrary/common/fpclassifyd2.c b/Extras/simdmathlibrary/common/fpclassifyd2.c new file mode 100644 index 000000000..b0f55bea6 --- /dev/null +++ b/Extras/simdmathlibrary/common/fpclassifyd2.c @@ -0,0 +1,36 @@ +/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed long long +fpclassifyd2 (vector double x) +{ + return _fpclassifyd2(x); +} diff --git a/Extras/simdmathlibrary/common/fpclassifyf4.c b/Extras/simdmathlibrary/common/fpclassifyf4.c new file mode 100644 index 000000000..1bc07c67a --- /dev/null +++ b/Extras/simdmathlibrary/common/fpclassifyf4.c @@ -0,0 +1,36 @@ +/* fpclassifyf4 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed int +fpclassifyf4 (vector float x) +{ + return _fpclassifyf4(x); +} diff --git a/Extras/simdmathlibrary/common/frexpd2.c b/Extras/simdmathlibrary/common/frexpd2.c new file mode 100644 index 000000000..c950cacbe --- /dev/null +++ b/Extras/simdmathlibrary/common/frexpd2.c @@ -0,0 +1,36 @@ +/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x' + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +frexpd2 (vector double x, vector signed long long *pexp) +{ + return _frexpd2(x, pexp); +} diff --git a/Extras/simdmathlibrary/common/frexpf4.c b/Extras/simdmathlibrary/common/frexpf4.c new file mode 100644 index 000000000..f65c52079 --- /dev/null +++ b/Extras/simdmathlibrary/common/frexpf4.c @@ -0,0 +1,36 @@ +/* frexpf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +frexpf4 (vector float x, vector signed int *exp) +{ + return _frexpf4( x, exp ); +} diff --git a/Extras/simdmathlibrary/spu/hypotd2.c b/Extras/simdmathlibrary/common/hypotd2.c similarity index 92% rename from Extras/simdmathlibrary/spu/hypotd2.c rename to Extras/simdmathlibrary/common/hypotd2.c index f45580bd0..ab8edcae6 100644 --- a/Extras/simdmathlibrary/spu/hypotd2.c +++ b/Extras/simdmathlibrary/common/hypotd2.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector double hypotd2 (vector double x, vector double y) { - vec_double2 sum = spu_mul(x,x); - sum = spu_madd(y,y,sum); - - return sqrtd2(sum); + return _hypotd2(x, y); } diff --git a/Extras/simdmathlibrary/common/hypotf4.c b/Extras/simdmathlibrary/common/hypotf4.c new file mode 100644 index 000000000..7bcf73c3b --- /dev/null +++ b/Extras/simdmathlibrary/common/hypotf4.c @@ -0,0 +1,36 @@ +/* hypotf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +hypotf4 (vector float x, vector float y) +{ + return _hypotf4( x, y ); +} diff --git a/Extras/simdmathlibrary/common/ilogbd2.c b/Extras/simdmathlibrary/common/ilogbd2.c new file mode 100644 index 000000000..c0a74521d --- /dev/null +++ b/Extras/simdmathlibrary/common/ilogbd2.c @@ -0,0 +1,36 @@ +/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed long long +ilogbd2 (vector double x) +{ + return _ilogbd2(x); +} diff --git a/Extras/simdmathlibrary/common/ilogbf4.c b/Extras/simdmathlibrary/common/ilogbf4.c new file mode 100644 index 000000000..0dd3a0268 --- /dev/null +++ b/Extras/simdmathlibrary/common/ilogbf4.c @@ -0,0 +1,36 @@ +/* ilogbf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed int +ilogbf4 (vector float x) +{ + return _ilogbf4( x ); +} diff --git a/Extras/simdmathlibrary/spu/irintf4.c b/Extras/simdmathlibrary/common/irintf4.c similarity index 93% rename from Extras/simdmathlibrary/spu/irintf4.c rename to Extras/simdmathlibrary/common/irintf4.c index 404bda09f..2378b67c5 100644 --- a/Extras/simdmathlibrary/spu/irintf4.c +++ b/Extras/simdmathlibrary/common/irintf4.c @@ -30,10 +30,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include -vector signed int irintf4(vector float in) +vector signed int +irintf4(vector float in) { - return spu_convts(in,0); + return _irintf4(in); } diff --git a/Extras/simdmathlibrary/spu/iroundf4.c b/Extras/simdmathlibrary/common/iroundf4.c similarity index 77% rename from Extras/simdmathlibrary/spu/iroundf4.c rename to Extras/simdmathlibrary/common/iroundf4.c index e60494330..8f2f4c8c8 100644 --- a/Extras/simdmathlibrary/spu/iroundf4.c +++ b/Extras/simdmathlibrary/common/iroundf4.c @@ -29,27 +29,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include -vector signed int iroundf4(vector float in) +vector signed int +iroundf4(vector float in) { - vec_int4 exp, out; - vec_uint4 addend; - - /* Add 0.5 (fixed precision to eliminate rounding issues - */ - exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF)); - - addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp), - spu_cmpgt((vec_uint4)exp, -31)); - - in = (vec_float4)spu_add((vec_uint4)in, addend); - - - /* Truncate the result. - */ - out = spu_convts(in,0); - - return (out); + return _iroundf4(in); } diff --git a/Extras/simdmathlibrary/spu/is0denormd2.c b/Extras/simdmathlibrary/common/is0denormd2.c similarity index 81% rename from Extras/simdmathlibrary/spu/is0denormd2.c rename to Extras/simdmathlibrary/common/is0denormd2.c index a4b2fa706..75496664c 100644 --- a/Extras/simdmathlibrary/spu/is0denormd2.c +++ b/Extras/simdmathlibrary/common/is0denormd2.c @@ -27,20 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector unsigned long long is0denormd2 (vector double x) { - vec_double2 xexp; - vec_ullong2 cmp; - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - - xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) ); - cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) ); - cmp = spu_shuffle( cmp, cmp, even ); - - return cmp; + return _is0denormd2(x); } diff --git a/Extras/simdmathlibrary/spu/is0denormf4.c b/Extras/simdmathlibrary/common/is0denormf4.c similarity index 90% rename from Extras/simdmathlibrary/spu/is0denormf4.c rename to Extras/simdmathlibrary/common/is0denormf4.c index 1d1b4f2d8..9e665b12a 100644 --- a/Extras/simdmathlibrary/spu/is0denormf4.c +++ b/Extras/simdmathlibrary/common/is0denormf4.c @@ -26,12 +26,10 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#include -#include +#include vector unsigned int is0denormf4 (vector float x) { - return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) ); + return _is0denormf4(x); } diff --git a/Extras/simdmathlibrary/common/isequald2.c b/Extras/simdmathlibrary/common/isequald2.c new file mode 100644 index 000000000..eeda92f76 --- /dev/null +++ b/Extras/simdmathlibrary/common/isequald2.c @@ -0,0 +1,35 @@ +/* isequald2 - for each of two double slots, if x = y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +#include + +vector unsigned long long +isequald2 (vector double x, vector double y) +{ + return _isequald2(x, y); +} diff --git a/Extras/simdmathlibrary/spu/isequalf4.c b/Extras/simdmathlibrary/common/isequalf4.c similarity index 95% rename from Extras/simdmathlibrary/spu/isequalf4.c rename to Extras/simdmathlibrary/common/isequalf4.c index 36b147463..0bc43ba9f 100644 --- a/Extras/simdmathlibrary/spu/isequalf4.c +++ b/Extras/simdmathlibrary/common/isequalf4.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isequalf4 (vector float x, vector float y) -{ - return spu_cmpeq(x, y); +{ + return _isequalf4(x, y); } diff --git a/Extras/simdmathlibrary/spu/isfinited2.c b/Extras/simdmathlibrary/common/isfinited2.c similarity index 79% rename from Extras/simdmathlibrary/spu/isfinited2.c rename to Extras/simdmathlibrary/common/isfinited2.c index 6c3de03d0..2ccc0a870 100644 --- a/Extras/simdmathlibrary/spu/isfinited2.c +++ b/Extras/simdmathlibrary/common/isfinited2.c @@ -27,21 +27,11 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned long long isfinited2 (vector double x) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 cmpr; - - //Finite unless NaN or Inf, check for 'not all-ones exponent' - - cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ); - cmpr = spu_shuffle( cmpr, cmpr, even); - - return cmpr; + return _isfinited2(x); } diff --git a/Extras/simdmathlibrary/spu/isfinitef4.c b/Extras/simdmathlibrary/common/isfinitef4.c similarity index 91% rename from Extras/simdmathlibrary/spu/isfinitef4.c rename to Extras/simdmathlibrary/common/isfinitef4.c index 50c8cd68d..52d37a139 100644 --- a/Extras/simdmathlibrary/spu/isfinitef4.c +++ b/Extras/simdmathlibrary/common/isfinitef4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isfinitef4 (vector float x) { - (void)x; - - // NaN, INF not supported on SPU, result always a mask of ones - return spu_splats((unsigned int)0xffffffff); + return _isfinitef4(x); } diff --git a/Extras/simdmathlibrary/common/isgreaterd2.c b/Extras/simdmathlibrary/common/isgreaterd2.c new file mode 100644 index 000000000..ff9fd0db6 --- /dev/null +++ b/Extras/simdmathlibrary/common/isgreaterd2.c @@ -0,0 +1,36 @@ +/* isgreaterd2 - for each of two double slots, if x > y return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +isgreaterd2 (vector double x, vector double y) +{ + return _isgreaterd2(x, y); +} diff --git a/Extras/simdmathlibrary/common/isgreaterequald2.c b/Extras/simdmathlibrary/common/isgreaterequald2.c new file mode 100644 index 000000000..d8a9c4515 --- /dev/null +++ b/Extras/simdmathlibrary/common/isgreaterequald2.c @@ -0,0 +1,36 @@ +/* isgreaterequald2 - for each of two double slots, if x is greater or equal to y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +isgreaterequald2 (vector double x, vector double y) +{ + return _isgreaterequald2(x, y); +} diff --git a/Extras/simdmathlibrary/spu/isgreaterequalf4.c b/Extras/simdmathlibrary/common/isgreaterequalf4.c similarity index 93% rename from Extras/simdmathlibrary/spu/isgreaterequalf4.c rename to Extras/simdmathlibrary/common/isgreaterequalf4.c index 886c02e25..e06dd0ff4 100644 --- a/Extras/simdmathlibrary/spu/isgreaterequalf4.c +++ b/Extras/simdmathlibrary/common/isgreaterequalf4.c @@ -27,15 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isgreaterequalf4 (vector float x, vector float y) -{ - vec_uint4 var; - - var = spu_cmpgt(y, x); - - return spu_nor(var, var); +{ + return _isgreaterequalf4(x, y); } diff --git a/Extras/simdmathlibrary/spu/isgreaterf4.c b/Extras/simdmathlibrary/common/isgreaterf4.c similarity index 95% rename from Extras/simdmathlibrary/spu/isgreaterf4.c rename to Extras/simdmathlibrary/common/isgreaterf4.c index 52f049e17..22978988d 100644 --- a/Extras/simdmathlibrary/spu/isgreaterf4.c +++ b/Extras/simdmathlibrary/common/isgreaterf4.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isgreaterf4 (vector float x, vector float y) -{ - return spu_cmpgt(x, y); +{ + return _isgreaterf4(x, y); } diff --git a/Extras/simdmathlibrary/spu/isinfd2.c b/Extras/simdmathlibrary/common/isinfd2.c similarity index 79% rename from Extras/simdmathlibrary/spu/isinfd2.c rename to Extras/simdmathlibrary/common/isinfd2.c index c266cbdb9..500fa2640 100644 --- a/Extras/simdmathlibrary/spu/isinfd2.c +++ b/Extras/simdmathlibrary/common/isinfd2.c @@ -27,21 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector unsigned long long isinfd2 (vector double x) { - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_double2 xabs; - vec_ullong2 cmp; - - xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); - cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) ); - cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) ); - - return cmp; + return _isinfd2(x); } - diff --git a/Extras/simdmathlibrary/spu/isinff4.c b/Extras/simdmathlibrary/common/isinff4.c similarity index 91% rename from Extras/simdmathlibrary/spu/isinff4.c rename to Extras/simdmathlibrary/common/isinff4.c index bf37bfeb7..15afc8b51 100644 --- a/Extras/simdmathlibrary/spu/isinff4.c +++ b/Extras/simdmathlibrary/common/isinff4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isinff4 (vector float x) -{ - (void)x; - - // INF not supported on SPU, result always zero - return spu_splats((unsigned int)0x00000000); +{ + return _isinff4(x); } diff --git a/Extras/simdmathlibrary/common/islessd2.c b/Extras/simdmathlibrary/common/islessd2.c new file mode 100644 index 000000000..40aecb8a1 --- /dev/null +++ b/Extras/simdmathlibrary/common/islessd2.c @@ -0,0 +1,36 @@ +/* islessd2 - for each of two double slots, if x < y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +islessd2 (vector double x, vector double y) +{ + return _islessd2(x, y); +} diff --git a/Extras/simdmathlibrary/common/islessequald2.c b/Extras/simdmathlibrary/common/islessequald2.c new file mode 100644 index 000000000..983e7491b --- /dev/null +++ b/Extras/simdmathlibrary/common/islessequald2.c @@ -0,0 +1,36 @@ +/* islessequald2 - for each of two double slots, if x <= y return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +islessequald2 (vector double x, vector double y) +{ + return _islessequald2(x, y); +} diff --git a/Extras/simdmathlibrary/spu/islessequalf4.c b/Extras/simdmathlibrary/common/islessequalf4.c similarity index 93% rename from Extras/simdmathlibrary/spu/islessequalf4.c rename to Extras/simdmathlibrary/common/islessequalf4.c index cf3459fa7..0eb7d7c1d 100644 --- a/Extras/simdmathlibrary/spu/islessequalf4.c +++ b/Extras/simdmathlibrary/common/islessequalf4.c @@ -27,15 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int islessequalf4 (vector float x, vector float y) -{ - vec_uint4 var; - - var = spu_cmpgt(x, y); - - return spu_nor(var, var); +{ + return _islessequalf4(x, y); } diff --git a/Extras/simdmathlibrary/spu/islessf4.c b/Extras/simdmathlibrary/common/islessf4.c similarity index 95% rename from Extras/simdmathlibrary/spu/islessf4.c rename to Extras/simdmathlibrary/common/islessf4.c index 55921dd53..e6d99f02b 100644 --- a/Extras/simdmathlibrary/spu/islessf4.c +++ b/Extras/simdmathlibrary/common/islessf4.c @@ -27,11 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int islessf4 (vector float x, vector float y) -{ - return spu_cmpgt(y, x); +{ + return _islessf4(x, y); } diff --git a/Extras/simdmathlibrary/common/islessgreaterd2.c b/Extras/simdmathlibrary/common/islessgreaterd2.c new file mode 100644 index 000000000..af4e5d812 --- /dev/null +++ b/Extras/simdmathlibrary/common/islessgreaterd2.c @@ -0,0 +1,36 @@ +/* islessgreaterd2 - for each of two double slots, if x is less or greater than y return a mask of ones, else zero + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +islessgreaterd2 (vector double x, vector double y) +{ + return _islessgreaterd2(x, y); +} diff --git a/Extras/simdmathlibrary/spu/islessgreaterf4.c b/Extras/simdmathlibrary/common/islessgreaterf4.c similarity index 93% rename from Extras/simdmathlibrary/spu/islessgreaterf4.c rename to Extras/simdmathlibrary/common/islessgreaterf4.c index 65ee77e20..acb9dee9a 100644 --- a/Extras/simdmathlibrary/spu/islessgreaterf4.c +++ b/Extras/simdmathlibrary/common/islessgreaterf4.c @@ -27,15 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int islessgreaterf4 (vector float x, vector float y) -{ - vec_uint4 var; - - var = spu_cmpeq(x, y); - - return spu_nor(var, var); +{ + return _islessgreaterf4(x, y); } diff --git a/Extras/simdmathlibrary/common/isnand2.c b/Extras/simdmathlibrary/common/isnand2.c new file mode 100644 index 000000000..cea82235b --- /dev/null +++ b/Extras/simdmathlibrary/common/isnand2.c @@ -0,0 +1,36 @@ +/* isnand2 - for each of two double slots, if input is any type of NaN return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +isnand2 (vector double x) +{ + return _isnand2(x); +} diff --git a/Extras/simdmathlibrary/spu/isnanf4.c b/Extras/simdmathlibrary/common/isnanf4.c similarity index 91% rename from Extras/simdmathlibrary/spu/isnanf4.c rename to Extras/simdmathlibrary/common/isnanf4.c index 39827b148..2e0f8fe4a 100644 --- a/Extras/simdmathlibrary/spu/isnanf4.c +++ b/Extras/simdmathlibrary/common/isnanf4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isnanf4 (vector float x) -{ - (void)x; - - // NaN not supported on SPU, result always zero - return spu_splats((unsigned int)0x00000000); +{ + return _isnanf4(x); } diff --git a/Extras/simdmathlibrary/common/isnormald2.c b/Extras/simdmathlibrary/common/isnormald2.c new file mode 100644 index 000000000..21a313f7f --- /dev/null +++ b/Extras/simdmathlibrary/common/isnormald2.c @@ -0,0 +1,36 @@ +/* isnormald2 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +isnormald2 (vector double x) +{ + return _isnormald2(x); +} diff --git a/Extras/simdmathlibrary/spu/isnormalf4.c b/Extras/simdmathlibrary/common/isnormalf4.c similarity index 91% rename from Extras/simdmathlibrary/spu/isnormalf4.c rename to Extras/simdmathlibrary/common/isnormalf4.c index a49fb695d..1271313fa 100644 --- a/Extras/simdmathlibrary/spu/isnormalf4.c +++ b/Extras/simdmathlibrary/common/isnormalf4.c @@ -27,12 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isnormalf4 (vector float x) { - // NaN, INF not supported on SPU; normal unless zero - return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000)); + return _isnormalf4(x); } diff --git a/Extras/simdmathlibrary/common/isunorderedd2.c b/Extras/simdmathlibrary/common/isunorderedd2.c new file mode 100644 index 000000000..ee135f3e4 --- /dev/null +++ b/Extras/simdmathlibrary/common/isunorderedd2.c @@ -0,0 +1,36 @@ +/* isunorderedd2 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector unsigned long long +isunorderedd2 (vector double x, vector double y) +{ + return _isunorderedd2(x, y); +} diff --git a/Extras/simdmathlibrary/spu/isunorderedf4.c b/Extras/simdmathlibrary/common/isunorderedf4.c similarity index 91% rename from Extras/simdmathlibrary/spu/isunorderedf4.c rename to Extras/simdmathlibrary/common/isunorderedf4.c index e09df12e5..d216f4e7b 100644 --- a/Extras/simdmathlibrary/spu/isunorderedf4.c +++ b/Extras/simdmathlibrary/common/isunorderedf4.c @@ -27,15 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int isunorderedf4 (vector float x, vector float y) { - (void)x; - (void)y; - - // NaN not supported on SPU, result always zero - return spu_splats((unsigned int)0x00000000); + return _isunorderedf4(x, y); } diff --git a/Extras/simdmathlibrary/common/ldexpd2.c b/Extras/simdmathlibrary/common/ldexpd2.c new file mode 100644 index 000000000..a5b35433d --- /dev/null +++ b/Extras/simdmathlibrary/common/ldexpd2.c @@ -0,0 +1,38 @@ +/* ldexpd2 - Multiply Double by 2 Raised to its Power + For large elements of ex (overflow), returns HUGE_VALF + For small elements of ex (underflow), returns 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +ldexpd2(vector double x, vector signed long long ex) +{ + return _ldexpd2(x, ex); +} diff --git a/Extras/simdmathlibrary/common/ldexpf4.c b/Extras/simdmathlibrary/common/ldexpf4.c new file mode 100644 index 000000000..e1680795d --- /dev/null +++ b/Extras/simdmathlibrary/common/ldexpf4.c @@ -0,0 +1,36 @@ +/* ldexpf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +ldexpf4 (vector float x, vector signed int exp) +{ + return _ldexpf4( x, exp ); +} diff --git a/Extras/simdmathlibrary/spu/llabsi2.c b/Extras/simdmathlibrary/common/llabsi2.c similarity index 79% rename from Extras/simdmathlibrary/spu/llabsi2.c rename to Extras/simdmathlibrary/common/llabsi2.c index 14297f3cd..454a3e0b0 100644 --- a/Extras/simdmathlibrary/spu/llabsi2.c +++ b/Extras/simdmathlibrary/common/llabsi2.c @@ -27,19 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector signed long long llabsi2 (vector signed long long in) { - vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); - sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); - - vec_uint4 add_1 = ((vec_uint4){0,1,0,1}); - vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in); - res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4)); - res = spu_sel( (vec_uint4)in, res, sign); - - return ((vec_llong2)(res)); + return _llabsi2(in); } diff --git a/Extras/simdmathlibrary/common/lldivi2.c b/Extras/simdmathlibrary/common/lldivi2.c new file mode 100644 index 000000000..7e14fded3 --- /dev/null +++ b/Extras/simdmathlibrary/common/lldivi2.c @@ -0,0 +1,36 @@ +/* lldivi2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +lldivi2_t +lldivi2 (vector signed long long numer, vector signed long long denom) +{ + return _lldivi2(numer, denom); +} diff --git a/Extras/simdmathlibrary/common/lldivu2.c b/Extras/simdmathlibrary/common/lldivu2.c new file mode 100644 index 000000000..5b2fd89b0 --- /dev/null +++ b/Extras/simdmathlibrary/common/lldivu2.c @@ -0,0 +1,36 @@ +/* lldivu2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +lldivu2_t +lldivu2 (vector unsigned long long numer, vector unsigned long long denom) +{ + return _lldivu2(numer, denom); +} diff --git a/Extras/simdmathlibrary/common/llrintd2.c b/Extras/simdmathlibrary/common/llrintd2.c new file mode 100644 index 000000000..3d4456110 --- /dev/null +++ b/Extras/simdmathlibrary/common/llrintd2.c @@ -0,0 +1,37 @@ +/* llrintd2 - rounds two doubles in to two nearest 64bit integer. + consistent with the current rounding mode. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed long long +llrintd2 (vector double in) +{ + return _llrintd2(in); +} diff --git a/Extras/simdmathlibrary/common/llrintf4.c b/Extras/simdmathlibrary/common/llrintf4.c new file mode 100644 index 000000000..96f956a2b --- /dev/null +++ b/Extras/simdmathlibrary/common/llrintf4.c @@ -0,0 +1,37 @@ +/* llrintf4 - rounds four floats in to four nearest 64bit integer. + On SPU the rounding mode for floats is always towards 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +llroundf4_t +llrintf4 (vector float in) +{ + return _llrintf4(in); +} diff --git a/Extras/simdmathlibrary/common/llroundd2.c b/Extras/simdmathlibrary/common/llroundd2.c new file mode 100644 index 000000000..680408e41 --- /dev/null +++ b/Extras/simdmathlibrary/common/llroundd2.c @@ -0,0 +1,37 @@ +/* llroundd2 - rounds two doubles in to two nearest 64bit integer. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector signed long long +llroundd2 (vector double in) +{ + return _llroundd2(in); +} diff --git a/Extras/simdmathlibrary/common/llroundf4.c b/Extras/simdmathlibrary/common/llroundf4.c new file mode 100644 index 000000000..db00e9c00 --- /dev/null +++ b/Extras/simdmathlibrary/common/llroundf4.c @@ -0,0 +1,37 @@ +/* llroundf4 - rounds four floats in to four nearest 64bit integer. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +llroundf4_t +llroundf4 (vector float in) +{ + return _llroundf4(in); +} diff --git a/Extras/simdmathlibrary/common/log10f4.c b/Extras/simdmathlibrary/common/log10f4.c new file mode 100644 index 000000000..a246f1fd1 --- /dev/null +++ b/Extras/simdmathlibrary/common/log10f4.c @@ -0,0 +1,36 @@ +/* log10f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +log10f4 (vector float x) +{ + return _log10f4( x ); +} diff --git a/Extras/simdmathlibrary/common/log1pf4.c b/Extras/simdmathlibrary/common/log1pf4.c new file mode 100644 index 000000000..5571df07f --- /dev/null +++ b/Extras/simdmathlibrary/common/log1pf4.c @@ -0,0 +1,36 @@ +/* log1pf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +log1pf4 (vector float x) +{ + return _log1pf4( x ); +} diff --git a/Extras/simdmathlibrary/common/log2f4.c b/Extras/simdmathlibrary/common/log2f4.c new file mode 100644 index 000000000..3c9d93a39 --- /dev/null +++ b/Extras/simdmathlibrary/common/log2f4.c @@ -0,0 +1,36 @@ +/* log2f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +log2f4 (vector float x) +{ + return _log2f4( x ); +} diff --git a/Extras/simdmathlibrary/common/logbd2.c b/Extras/simdmathlibrary/common/logbd2.c new file mode 100644 index 000000000..0ea5b7d1d --- /dev/null +++ b/Extras/simdmathlibrary/common/logbd2.c @@ -0,0 +1,36 @@ +/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +logbd2 (vector double x) +{ + return _logbd2(x); +} diff --git a/Extras/simdmathlibrary/common/logbf4.c b/Extras/simdmathlibrary/common/logbf4.c new file mode 100644 index 000000000..19ab3a1ea --- /dev/null +++ b/Extras/simdmathlibrary/common/logbf4.c @@ -0,0 +1,36 @@ +/* logbf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +logbf4 (vector float x) +{ + return _logbf4( x ); +} diff --git a/Extras/simdmathlibrary/common/logf4.c b/Extras/simdmathlibrary/common/logf4.c new file mode 100644 index 000000000..fc2372e4d --- /dev/null +++ b/Extras/simdmathlibrary/common/logf4.c @@ -0,0 +1,36 @@ +/* logf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +logf4 (vector float x) +{ + return _logf4( x ); +} diff --git a/Extras/simdmathlibrary/common/modfd2.c b/Extras/simdmathlibrary/common/modfd2.c new file mode 100644 index 000000000..52dcb3425 --- /dev/null +++ b/Extras/simdmathlibrary/common/modfd2.c @@ -0,0 +1,36 @@ +/* modfd2 - for each of two double slots, compute fractional and integral parts. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +modfd2 (vector double x, vector double *iptr) +{ + return _modfd2(x, iptr); +} diff --git a/Extras/simdmathlibrary/common/modff4.c b/Extras/simdmathlibrary/common/modff4.c new file mode 100644 index 000000000..b460ccfb3 --- /dev/null +++ b/Extras/simdmathlibrary/common/modff4.c @@ -0,0 +1,36 @@ +/* modff4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +modff4 (vector float x, vector float *iptr) +{ + return _modff4( x, iptr ); +} diff --git a/Extras/simdmathlibrary/common/nearbyintd2.c b/Extras/simdmathlibrary/common/nearbyintd2.c new file mode 100644 index 000000000..354d7f08e --- /dev/null +++ b/Extras/simdmathlibrary/common/nearbyintd2.c @@ -0,0 +1,37 @@ +/* nearbyintd2 - Round the input to the nearest integer according to + the current rounding mode without raising an inexact exception. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +nearbyintd2(vector double in) +{ + return _nearbyintd2(in); +} diff --git a/Extras/simdmathlibrary/spu/nearbyintf4.c b/Extras/simdmathlibrary/common/nearbyintf4.c similarity index 80% rename from Extras/simdmathlibrary/spu/nearbyintf4.c rename to Extras/simdmathlibrary/common/nearbyintf4.c index 9770a8ec6..56256501d 100644 --- a/Extras/simdmathlibrary/spu/nearbyintf4.c +++ b/Extras/simdmathlibrary/common/nearbyintf4.c @@ -30,21 +30,10 @@ POSSIBILITY OF SUCH DAMAGE. */ +#include -#include -#include - -vector float nearbyintf4(vector float x) +vector float +nearbyintf4(vector float x) { - vector signed int xi; - vector unsigned int inrange; - - // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). - // However, no truncation needed if exponent > 22. - - inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); - - xi = spu_convts( x, 0 ); - - return spu_sel( x, spu_convtf( xi, 0 ), inrange ); + return _nearbyintf4(x); } diff --git a/Extras/simdmathlibrary/spu/negated2.c b/Extras/simdmathlibrary/common/negated2.c similarity index 92% rename from Extras/simdmathlibrary/spu/negated2.c rename to Extras/simdmathlibrary/common/negated2.c index 801dddfa8..66847d094 100644 --- a/Extras/simdmathlibrary/spu/negated2.c +++ b/Extras/simdmathlibrary/common/negated2.c @@ -27,12 +27,11 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector double negated2 (vector double x) { - return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + return _negated2(x); } diff --git a/Extras/simdmathlibrary/ppu/negatef4.c b/Extras/simdmathlibrary/common/negatef4.c similarity index 91% rename from Extras/simdmathlibrary/ppu/negatef4.c rename to Extras/simdmathlibrary/common/negatef4.c index 74447c90e..185666368 100644 --- a/Extras/simdmathlibrary/ppu/negatef4.c +++ b/Extras/simdmathlibrary/common/negatef4.c @@ -27,14 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - -#include "common-types.h" +#include vector float negatef4 (vector float x) { - return (vector float)vec_xor( (vector unsigned int)x, vec_splatsu4(0x80000000) ); + return _negatef4( x ); } - diff --git a/Extras/simdmathlibrary/ppu/negatei4.c b/Extras/simdmathlibrary/common/negatei4.c similarity index 92% rename from Extras/simdmathlibrary/ppu/negatei4.c rename to Extras/simdmathlibrary/common/negatei4.c index 7d538053b..622fb7a4b 100644 --- a/Extras/simdmathlibrary/ppu/negatei4.c +++ b/Extras/simdmathlibrary/common/negatei4.c @@ -27,13 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector signed int negatei4 (vector signed int x) { - vector signed int zero = (vector signed int){0, 0, 0, 0}; - return vec_sub (zero, x); + return _negatei4( x ); } - diff --git a/Extras/simdmathlibrary/spu/negatell2.c b/Extras/simdmathlibrary/common/negatell2.c similarity index 83% rename from Extras/simdmathlibrary/spu/negatell2.c rename to Extras/simdmathlibrary/common/negatell2.c index f3fdb5603..36fc706da 100644 --- a/Extras/simdmathlibrary/spu/negatell2.c +++ b/Extras/simdmathlibrary/common/negatell2.c @@ -27,17 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector signed long long negatell2 (vector signed long long x) { - vector signed int zero = (vector signed int){0,0,0,0}; - vector signed int borrow; - - borrow = spu_genb(zero, (vec_int4)x); - borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0})); - return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); + return _negatell2(x); } - diff --git a/Extras/simdmathlibrary/common/nextafterd2.c b/Extras/simdmathlibrary/common/nextafterd2.c new file mode 100644 index 000000000..8b7f48c16 --- /dev/null +++ b/Extras/simdmathlibrary/common/nextafterd2.c @@ -0,0 +1,36 @@ +/* nextafterd2 - find next representable floating-point value towards 2nd param. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +nextafterd2 (vector double xx, vector double yy) +{ + return _nextafterd2(xx, yy); +} diff --git a/Extras/simdmathlibrary/common/nextafterf4.c b/Extras/simdmathlibrary/common/nextafterf4.c new file mode 100644 index 000000000..9742db409 --- /dev/null +++ b/Extras/simdmathlibrary/common/nextafterf4.c @@ -0,0 +1,38 @@ +/* nextafterf4 - for each of four float slots, + return the the next representable value after x in the direction fo y, + if x is euqal to y, the result is y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +nextafterf4(vector float x, vector float y) +{ + return _nextafterf4(x, y); +} diff --git a/Extras/simdmathlibrary/common/powf4.c b/Extras/simdmathlibrary/common/powf4.c new file mode 100644 index 000000000..00c3a82eb --- /dev/null +++ b/Extras/simdmathlibrary/common/powf4.c @@ -0,0 +1,36 @@ +/* exp2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +powf4 (vector float x, vector float y) +{ + return _powf4( x, y ); +} diff --git a/Extras/simdmathlibrary/common/recipd2.c b/Extras/simdmathlibrary/common/recipd2.c new file mode 100644 index 000000000..d84d01b98 --- /dev/null +++ b/Extras/simdmathlibrary/common/recipd2.c @@ -0,0 +1,36 @@ +/* recipd2 - for each of two double slots, compute reciprocal. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +recipd2 (vector double x) +{ + return _recipd2(x); +} diff --git a/Extras/simdmathlibrary/common/recipf4.c b/Extras/simdmathlibrary/common/recipf4.c new file mode 100644 index 000000000..9743efe0c --- /dev/null +++ b/Extras/simdmathlibrary/common/recipf4.c @@ -0,0 +1,36 @@ +/* recipf4 - for each of four float slots, compute reciprocal. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +recipf4 (vector float x) +{ + return _recipf4( x ); +} diff --git a/Extras/simdmathlibrary/common/remainderd2.c b/Extras/simdmathlibrary/common/remainderd2.c new file mode 100644 index 000000000..082e98860 --- /dev/null +++ b/Extras/simdmathlibrary/common/remainderd2.c @@ -0,0 +1,37 @@ +/* A vector double is returned that contains the remainder xi REM yi, + for the corresponding elements of vector double x and vector double y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +remainderd2(vector double x, vector double yy) +{ + return _remainderd2(x, yy); +} diff --git a/Extras/simdmathlibrary/common/remainderf4.c b/Extras/simdmathlibrary/common/remainderf4.c new file mode 100644 index 000000000..f65358798 --- /dev/null +++ b/Extras/simdmathlibrary/common/remainderf4.c @@ -0,0 +1,36 @@ +/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +remainderf4 (vector float x, vector float y) +{ + return _remainderf4(x, y); +} diff --git a/Extras/simdmathlibrary/common/remquod2.c b/Extras/simdmathlibrary/common/remquod2.c new file mode 100644 index 000000000..c06a4b4cd --- /dev/null +++ b/Extras/simdmathlibrary/common/remquod2.c @@ -0,0 +1,36 @@ +/* remquod2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +remquod2(vector double x, vector double yy, vector signed long long *quo) +{ + return _remquod2(x, yy, quo); +} diff --git a/Extras/simdmathlibrary/common/remquof4.c b/Extras/simdmathlibrary/common/remquof4.c new file mode 100644 index 000000000..2429f9524 --- /dev/null +++ b/Extras/simdmathlibrary/common/remquof4.c @@ -0,0 +1,36 @@ +/* remquof4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +remquof4(vector float x, vector float y, vector signed int *quo) +{ + return _remquof4(x, y, quo); +} diff --git a/Extras/simdmathlibrary/common/rintd2.c b/Extras/simdmathlibrary/common/rintd2.c new file mode 100644 index 000000000..a2e9435a2 --- /dev/null +++ b/Extras/simdmathlibrary/common/rintd2.c @@ -0,0 +1,37 @@ +/* rintd2 - Round the input to the nearest integer according to + the current rounding mode. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +rintd2(vector double in) +{ + return _rintd2(in); +} diff --git a/Extras/simdmathlibrary/spu/rintf4.c b/Extras/simdmathlibrary/common/rintf4.c similarity index 80% rename from Extras/simdmathlibrary/spu/rintf4.c rename to Extras/simdmathlibrary/common/rintf4.c index 57ac9c64e..c8069dc91 100644 --- a/Extras/simdmathlibrary/spu/rintf4.c +++ b/Extras/simdmathlibrary/common/rintf4.c @@ -29,20 +29,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include -vector float rintf4(vector float x) +vector float +rintf4(vector float x) { - vector signed int xi; - vector unsigned int inrange; - - // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). - // However, no truncation needed if exponent > 22. - - inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); - - xi = spu_convts( x, 0 ); - - return spu_sel( x, spu_convtf( xi, 0 ), inrange ); + return _rintf4(x); } diff --git a/Extras/simdmathlibrary/common/roundd2.c b/Extras/simdmathlibrary/common/roundd2.c new file mode 100644 index 000000000..fadc41b83 --- /dev/null +++ b/Extras/simdmathlibrary/common/roundd2.c @@ -0,0 +1,37 @@ +/* roundd2 - rounds to nearest integer value in floating point format. + 0.5 will be rounded to far from 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +roundd2 (vector double in) +{ + return _roundd2(in); +} diff --git a/Extras/simdmathlibrary/common/roundf4.c b/Extras/simdmathlibrary/common/roundf4.c new file mode 100644 index 000000000..65277151d --- /dev/null +++ b/Extras/simdmathlibrary/common/roundf4.c @@ -0,0 +1,37 @@ +/* roundf4 - for each of four float slots, round to the nearest integer, + halfway cases are rounded away form zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +roundf4(vector float in) +{ + return _roundf4(in); +} diff --git a/Extras/simdmathlibrary/common/rsqrtd2.c b/Extras/simdmathlibrary/common/rsqrtd2.c new file mode 100644 index 000000000..bbd58c468 --- /dev/null +++ b/Extras/simdmathlibrary/common/rsqrtd2.c @@ -0,0 +1,36 @@ +/* rsqrtd2 - for each of two double slots, compute reciprocal square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +rsqrtd2 (vector double x) +{ + return _rsqrtd2(x); +} diff --git a/Extras/simdmathlibrary/common/rsqrtf4.c b/Extras/simdmathlibrary/common/rsqrtf4.c new file mode 100644 index 000000000..fa0dec3ab --- /dev/null +++ b/Extras/simdmathlibrary/common/rsqrtf4.c @@ -0,0 +1,36 @@ +/* sqrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +rsqrtf4 (vector float x) +{ + return _rsqrtf4( x ); +} diff --git a/Extras/simdmathlibrary/common/scalbllnd2.c b/Extras/simdmathlibrary/common/scalbllnd2.c new file mode 100644 index 000000000..aad9e5cda --- /dev/null +++ b/Extras/simdmathlibrary/common/scalbllnd2.c @@ -0,0 +1,38 @@ +/* scalbllnd2 - Multiply Double by 2 Raised to its Power + For large elements of ex (overflow), returns HUGE_VALF + For small elements of ex (underflow), returns 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +scalbllnd2(vector double x, vector signed long long ex) +{ + return _scalbllnd2(x, ex); +} diff --git a/Extras/simdmathlibrary/common/scalbnf4.c b/Extras/simdmathlibrary/common/scalbnf4.c new file mode 100644 index 000000000..49db439d6 --- /dev/null +++ b/Extras/simdmathlibrary/common/scalbnf4.c @@ -0,0 +1,38 @@ +/* scalbnf4 computes x * 2^exp. This function is computed without + the assistence of any floating point operations and as such does + not set any floating point exceptions. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +scalbnf4(vector float x, vector signed int n) +{ + return _scalbnf4(x, n); +} diff --git a/Extras/simdmathlibrary/spu/signbitd2.c b/Extras/simdmathlibrary/common/signbitd2.c similarity index 83% rename from Extras/simdmathlibrary/spu/signbitd2.c rename to Extras/simdmathlibrary/common/signbitd2.c index 7978aa4a3..facdb6cb9 100644 --- a/Extras/simdmathlibrary/spu/signbitd2.c +++ b/Extras/simdmathlibrary/common/signbitd2.c @@ -27,17 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include -vector unsigned long long signbitd2 (vector double x) +vector unsigned long long +signbitd2 (vector double x) { - vec_ullong2 cmp; - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - - cmp = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)x ); - cmp = spu_shuffle( cmp, cmp, even ); - - return cmp; + return _signbitd2(x); } - diff --git a/Extras/simdmathlibrary/spu/signbitf4.c b/Extras/simdmathlibrary/common/signbitf4.c similarity index 94% rename from Extras/simdmathlibrary/spu/signbitf4.c rename to Extras/simdmathlibrary/common/signbitf4.c index 6ab9103a8..811879ecd 100644 --- a/Extras/simdmathlibrary/spu/signbitf4.c +++ b/Extras/simdmathlibrary/common/signbitf4.c @@ -27,12 +27,10 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#include vector unsigned int signbitf4 (vector float x) { - return spu_cmpgt( spu_splats(0), (vec_int4)x ); + return _signbitf4(x); } - diff --git a/Extras/simdmathlibrary/simdmath.h b/Extras/simdmathlibrary/common/simdmath.h similarity index 62% rename from Extras/simdmathlibrary/simdmath.h rename to Extras/simdmathlibrary/common/simdmath.h index c50b241f8..e953d8dff 100644 --- a/Extras/simdmathlibrary/simdmath.h +++ b/Extras/simdmathlibrary/common/simdmath.h @@ -29,41 +29,43 @@ -#ifndef ___SIMD_MATH_H____ -#define ___SIMD_MATH_H____ +#ifndef ___SIMD_MATH_H___ +#define ___SIMD_MATH_H___ -#define SIMD_MATH_HAVE_VECTOR_f4 0 -#define SIMD_MATH_HAVE_VECTOR_i4 0 -#define SIMD_MATH_HAVE_VECTOR_d2 0 -#define SIMD_MATH_HAVE_VECTOR_ll2 0 +#define __SIMD_MATH_HAVE_VECTOR_f4 0 +#define __SIMD_MATH_HAVE_VECTOR_i4 0 +#define __SIMD_MATH_HAVE_VECTOR_d2 0 +#define __SIMD_MATH_HAVE_VECTOR_ll2 0 #ifdef __SPU__ /* SPU has vector float, vector double, vector {un,}signed long long, and vector {un,signed} int. */ -#undef SIMD_MATH_HAVE_VECTOR_f4 -#define SIMD_MATH_HAVE_VECTOR_f4 1 +#include -#undef SIMD_MATH_HAVE_VECTOR_i4 -#define SIMD_MATH_HAVE_VECTOR_i4 1 +#undef __SIMD_MATH_HAVE_VECTOR_f4 +#define __SIMD_MATH_HAVE_VECTOR_f4 1 -#undef SIMD_MATH_HAVE_VECTOR_d2 -#define SIMD_MATH_HAVE_VECTOR_d2 1 +#undef __SIMD_MATH_HAVE_VECTOR_i4 +#define __SIMD_MATH_HAVE_VECTOR_i4 1 -#undef SIMD_MATH_HAVE_VECTOR_ll2 -#define SIMD_MATH_HAVE_VECTOR_ll2 1 +#undef __SIMD_MATH_HAVE_VECTOR_d2 +#define __SIMD_MATH_HAVE_VECTOR_d2 1 + +#undef __SIMD_MATH_HAVE_VECTOR_ll2 +#define __SIMD_MATH_HAVE_VECTOR_ll2 1 #elif defined(__ALTIVEC__) #include /* PPU has vector float, and vector int. */ -#undef SIMD_MATH_HAVE_VECTOR_f4 -#define SIMD_MATH_HAVE_VECTOR_f4 1 +#undef __SIMD_MATH_HAVE_VECTOR_f4 +#define __SIMD_MATH_HAVE_VECTOR_f4 1 -#undef SIMD_MATH_HAVE_VECTOR_i4 -#define SIMD_MATH_HAVE_VECTOR_i4 1 +#undef __SIMD_MATH_HAVE_VECTOR_i4 +#define __SIMD_MATH_HAVE_VECTOR_i4 1 #else @@ -79,35 +81,35 @@ extern "C" { /* Types */ -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 typedef struct divi4_s { vector signed int quot; vector signed int rem; } divi4_t; #endif -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 typedef struct divu4_s { vector unsigned int quot; vector unsigned int rem; } divu4_t; #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 typedef struct lldivi2_s { vector signed long long quot; vector signed long long rem; } lldivi2_t; #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 typedef struct lldivu2_s { vector unsigned long long quot; vector unsigned long long rem; } lldivu2_t; #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_ll2 typedef struct llroundf4_s { vector signed long long vll[2]; } llroundf4_t; @@ -115,606 +117,611 @@ typedef struct llroundf4_s { /* integer divide */ -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 divi4_t divi4 (vector signed int, vector signed int); #endif -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 divu4_t divu4 (vector unsigned int, vector unsigned int); #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 lldivi2_t lldivi2 (vector signed long long, vector signed long long); #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 lldivu2_t lldivu2 (vector unsigned long long, vector unsigned long long); #endif /* abs value */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fabsf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fabsd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 vector signed int absi4 (vector signed int); #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long llabsi2 (vector signed long long); #endif /* negate value */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float negatef4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double negated2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_i4 vector signed int negatei4 (vector signed int); #endif -#if SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long negatell2 (vector signed long long); #endif /* trunc */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float truncf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double trund2 (vector double); #endif /* floor */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float floorf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double floord2 (vector double); #endif /* ceil */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float ceilf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double ceild2 (vector double); #endif /* exp */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float expf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double expd2 (vector double); #endif /* exp */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float exp2f4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double exp2d2 (vector double); #endif /* expm1 */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float expm1f4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double expm1d2 (vector double); #endif /* log */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float logf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double logd2 (vector double); #endif /* log10 */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float log10f4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double log10d2 (vector double); #endif /* log1p */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float log1pf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double log1pd2 (vector double); #endif +/* pow */ +#if __SIMD_MATH_HAVE_VECTOR_f4 +vector float powf4 (vector float, vector float); +#endif + /* fma */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fmaf4 (vector float, vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fmad2 (vector double, vector double, vector double); #endif /* fmax */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fmaxf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fmaxd2 (vector double, vector double); #endif /* fmin */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fminf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fmind2 (vector double, vector double); #endif /* fdim */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fdimf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fdimd2 (vector double, vector double); #endif /* fmod */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float fmodf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fmodd2 (vector double, vector double); #endif /* log2 */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float log2f4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double log2d2 (vector double); #endif /* logb */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float logbf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double logbd2 (vector double); #endif /* ilogb */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector signed int ilogbf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long ilogbd2 (vector double); #endif /* modf */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float modff4 (vector float, vector float *); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double modfd2 (vector double, vector double *); #endif /* sqrt */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float sqrtf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double sqrtd2 (vector double); #endif /* hypot */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float hypotf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double hypotd2 (vector double, vector double); #endif /* cbrtf4 */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float cbrtf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double cbrtd2 (vector double); #endif /* sin */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float sinf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double sind2 (vector double); #endif /* asin */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float asinf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double asind2 (vector double); #endif /* divide */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float divf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double divd2 (vector double, vector double); #endif /* remainder */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float remainderf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double remainderd2 (vector double, vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector float remquof4(vector float x, vector float y, vector signed int *quo); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector double remquod2(vector double x, vector double y, vector signed long long *quo); #endif /* copysign */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float copysignf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double copysignd2 (vector double, vector double); #endif /* cos */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float cosf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double cosd2 (vector double); #endif /* acos */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float acosf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double acosd2 (vector double); #endif /* atan */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float atanf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double atand2 (vector double); #endif /* atan2 */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float atan2f4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double atan2d2 (vector double, vector double); #endif /* tan */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float tanf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double tand2 (vector double); #endif /* sincos */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 void sincosf4 (vector float, vector float *, vector float *); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 void sincosd2 (vector double, vector double *, vector double *); #endif /* recip */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float recipf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double recipd2 (vector double); #endif /* rsqrt */ -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float rsqrtf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double rsqrtd2 (vector double); #endif /* frexp */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector float frexpf4 (vector float, vector signed int *); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector double frexpd2 (vector double, vector signed long long *); #endif /* ldexp */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector float ldexpf4 (vector float, vector signed int ); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector double ldexpd2 (vector double, vector signed long long ); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector float scalbnf4(vector float x, vector signed int n); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector double scalbllnd2 (vector double, vector signed long long ); #endif /* isnan */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isnanf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isnand2 (vector double); #endif /* isinf */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isinff4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isinfd2 (vector double); #endif /* isfinite */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isfinitef4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isfinited2 (vector double); #endif /* isnormal */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isnormalf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isnormald2 (vector double); #endif /* isunordered */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isunorderedf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isunorderedd2 (vector double, vector double); #endif /* is0denorm */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int is0denormf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long is0denormd2 (vector double); #endif /* signbit */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int signbitf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long signbitd2 (vector double); #endif /* isequal */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isequalf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isequald2 (vector double, vector double); #endif /* islessgreater */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int islessgreaterf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long islessgreaterd2 (vector double, vector double); #endif /* isless */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int islessf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long islessd2 (vector double, vector double); #endif /* isgreater */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isgreaterf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isgreaterd2 (vector double, vector double); #endif /* islessequal */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int islessequalf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long islessequald2 (vector double, vector double); #endif /* isgreaterequal */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector unsigned int isgreaterequalf4 (vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector unsigned long long isgreaterequald2 (vector double, vector double); #endif /* fpclassify */ -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector signed int fpclassifyf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long fpclassifyd2 (vector double); #endif /* round */ -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long llroundd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_ll2 llroundf4_t llroundf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_ll2 llroundf4_t llrintf4 (vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 && SIMD_MATH_HAVE_VECTOR_ll2 +#if __SIMD_MATH_HAVE_VECTOR_d2 && __SIMD_MATH_HAVE_VECTOR_ll2 vector signed long long llrintd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float roundf4(vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector signed int iroundf4(vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float rintf4(vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 && SIMD_MATH_HAVE_VECTOR_i4 +#if __SIMD_MATH_HAVE_VECTOR_f4 && __SIMD_MATH_HAVE_VECTOR_i4 vector signed int irintf4(vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double nextafterd2 (vector double, vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float nextafterf4(vector float, vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double nearbyintd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_f4 +#if __SIMD_MATH_HAVE_VECTOR_f4 vector float nearbyintf4(vector float); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double truncd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double roundd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double rintd2 (vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double ceild2(vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double floord2(vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double fmodd2(vector double, vector double); #endif -#if SIMD_MATH_HAVE_VECTOR_d2 +#if __SIMD_MATH_HAVE_VECTOR_d2 vector double remainderd2(vector double, vector double); #endif diff --git a/Extras/simdmathlibrary/spu/sincos_c.h b/Extras/simdmathlibrary/common/simdmath/_sincos.h similarity index 59% rename from Extras/simdmathlibrary/spu/sincos_c.h rename to Extras/simdmathlibrary/common/simdmath/_sincos.h index d29fc08f0..a604ae1e3 100644 --- a/Extras/simdmathlibrary/spu/sincos_c.h +++ b/Extras/simdmathlibrary/common/simdmath/_sincos.h @@ -27,52 +27,52 @@ POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __SINCOS_C2__ -#define __SINCOS_C2__ +#ifndef ___SIMD_MATH__SINCOS_H___ +#define ___SIMD_MATH__SINCOS_H___ // // Common constants used to evaluate sind2/cosd2/tand2 // -#define _SINCOS_CC0D 0.00000000206374484196 -#define _SINCOS_CC1D -0.00000027555365134677 -#define _SINCOS_CC2D 0.00002480157946764225 -#define _SINCOS_CC3D -0.00138888888730525966 -#define _SINCOS_CC4D 0.04166666666651986722 -#define _SINCOS_CC5D -0.49999999999999547304 +#define __SINCOSD_CC0 0.00000000206374484196 +#define __SINCOSD_CC1 -0.00000027555365134677 +#define __SINCOSD_CC2 0.00002480157946764225 +#define __SINCOSD_CC3 -0.00138888888730525966 +#define __SINCOSD_CC4 0.04166666666651986722 +#define __SINCOSD_CC5 -0.49999999999999547304 -#define _SINCOS_SC0D 0.00000000015893606014 -#define _SINCOS_SC1D -0.00000002505069049138 -#define _SINCOS_SC2D 0.00000275573131527032 -#define _SINCOS_SC3D -0.00019841269827816117 -#define _SINCOS_SC4D 0.00833333333331908278 -#define _SINCOS_SC5D -0.16666666666666612594 +#define __SINCOSD_SC0 0.00000000015893606014 +#define __SINCOSD_SC1 -0.00000002505069049138 +#define __SINCOSD_SC2 0.00000275573131527032 +#define __SINCOSD_SC3 -0.00019841269827816117 +#define __SINCOSD_SC4 0.00833333333331908278 +#define __SINCOSD_SC5 -0.16666666666666612594 -#define _SINCOS_KC1D (13176794.0 / 8388608.0) -#define _SINCOS_KC2D 7.5497899548918821691639751442098584e-8 +#define __SINCOSD_KC1 (13176794.0 / 8388608.0) +#define __SINCOSD_KC2 7.5497899548918821691639751442098584e-8 // // Common constants used to evaluate sinf4/cosf4/tanf4 // -#define _SINCOS_CC0 -0.0013602249f -#define _SINCOS_CC1 0.0416566950f -#define _SINCOS_CC2 -0.4999990225f -#define _SINCOS_SC0 -0.0001950727f -#define _SINCOS_SC1 0.0083320758f -#define _SINCOS_SC2 -0.1666665247f +#define __SINCOSF_CC0 -0.0013602249f +#define __SINCOSF_CC1 0.0416566950f +#define __SINCOSF_CC2 -0.4999990225f +#define __SINCOSF_SC0 -0.0001950727f +#define __SINCOSF_SC1 0.0083320758f +#define __SINCOSF_SC2 -0.1666665247f -#define _SINCOS_KC1 1.57079625129f -#define _SINCOS_KC2 7.54978995489e-8f +#define __SINCOSF_KC1 1.57079625129f +#define __SINCOSF_KC2 7.54978995489e-8f // // Common constants used to evaluate sinf4est/cosf4est // -#define _SINCOS_R1 -0.1666665668f -#define _SINCOS_R2 0.8333025139e-2f -#define _SINCOS_R3 -0.1980741872e-3f -#define _SINCOS_R4 0.2601903036e-5f +#define __SINCOSF_R1 -0.1666665668f +#define __SINCOSF_R2 0.8333025139e-2f +#define __SINCOSF_R3 -0.1980741872e-3f +#define __SINCOSF_R4 0.2601903036e-5f -#define _SINCOS_C1 (201.0f/64.0f) -#define _SINCOS_C2 9.67653589793e-4f +#define __SINCOSF_C1 (201.0f/64.0f) +#define __SINCOSF_C2 9.67653589793e-4f #endif diff --git a/Extras/simdmathlibrary/common/sincosd2.c b/Extras/simdmathlibrary/common/sincosd2.c new file mode 100644 index 000000000..f5dce4d7e --- /dev/null +++ b/Extras/simdmathlibrary/common/sincosd2.c @@ -0,0 +1,36 @@ +/* sincosd2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +void +sincosd2 (vector double x, vector double *s, vector double *c) +{ + _sincosd2( x, s, c ); +} diff --git a/Extras/simdmathlibrary/common/sincosf4.c b/Extras/simdmathlibrary/common/sincosf4.c new file mode 100644 index 000000000..dfa2333d6 --- /dev/null +++ b/Extras/simdmathlibrary/common/sincosf4.c @@ -0,0 +1,36 @@ +/* sincosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +void +sincosf4 (vector float x, vector float *s, vector float *c) +{ + _sincosf4( x, s, c ); +} diff --git a/Extras/simdmathlibrary/common/sind2.c b/Extras/simdmathlibrary/common/sind2.c new file mode 100644 index 000000000..25e8ca0e5 --- /dev/null +++ b/Extras/simdmathlibrary/common/sind2.c @@ -0,0 +1,36 @@ +/* sind2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +sind2 (vector double x) +{ + return _sind2(x); +} diff --git a/Extras/simdmathlibrary/common/sinf4.c b/Extras/simdmathlibrary/common/sinf4.c new file mode 100644 index 000000000..7e5ffd02e --- /dev/null +++ b/Extras/simdmathlibrary/common/sinf4.c @@ -0,0 +1,36 @@ +/* sinf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +sinf4 (vector float x) +{ + return _sinf4( x ); +} diff --git a/Extras/simdmathlibrary/common/sqrtd2.c b/Extras/simdmathlibrary/common/sqrtd2.c new file mode 100644 index 000000000..b73d95360 --- /dev/null +++ b/Extras/simdmathlibrary/common/sqrtd2.c @@ -0,0 +1,36 @@ +/* sqrtd2 - for each of two double slots, compute square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +sqrtd2 (vector double x) +{ + return _sqrtd2(x); +} diff --git a/Extras/simdmathlibrary/common/sqrtf4.c b/Extras/simdmathlibrary/common/sqrtf4.c new file mode 100644 index 000000000..557c80fdf --- /dev/null +++ b/Extras/simdmathlibrary/common/sqrtf4.c @@ -0,0 +1,36 @@ +/* sqrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +sqrtf4 (vector float x) +{ + return _sqrtf4( x ); +} diff --git a/Extras/simdmathlibrary/common/tand2.c b/Extras/simdmathlibrary/common/tand2.c new file mode 100644 index 000000000..083a35d2a --- /dev/null +++ b/Extras/simdmathlibrary/common/tand2.c @@ -0,0 +1,36 @@ +/* tand2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +tand2 (vector double x) +{ + return _tand2(x); +} diff --git a/Extras/simdmathlibrary/common/tanf4.c b/Extras/simdmathlibrary/common/tanf4.c new file mode 100644 index 000000000..1857e048d --- /dev/null +++ b/Extras/simdmathlibrary/common/tanf4.c @@ -0,0 +1,36 @@ +/* tanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector float +tanf4 (vector float x) +{ + return _tanf4( x ); +} diff --git a/Extras/simdmathlibrary/common/truncd2.c b/Extras/simdmathlibrary/common/truncd2.c new file mode 100644 index 000000000..7a01c480f --- /dev/null +++ b/Extras/simdmathlibrary/common/truncd2.c @@ -0,0 +1,37 @@ +/* truncd2 - Round the input to the nearest integer. + Always rounds towards 0. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +vector double +truncd2(vector double in) +{ + return _truncd2(in); +} diff --git a/Extras/simdmathlibrary/ppu/truncf4.c b/Extras/simdmathlibrary/common/truncf4.c similarity index 96% rename from Extras/simdmathlibrary/ppu/truncf4.c rename to Extras/simdmathlibrary/common/truncf4.c index 664bd7074..2e73376a6 100644 --- a/Extras/simdmathlibrary/ppu/truncf4.c +++ b/Extras/simdmathlibrary/common/truncf4.c @@ -27,13 +27,11 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include - +#include vector float truncf4 (vector float x) { - return vec_trunc( x ); + return _truncf4( x ); } diff --git a/Extras/simdmathlibrary/ppu/Makefile b/Extras/simdmathlibrary/ppu/Makefile index ad1f677c4..901c898c5 100644 --- a/Extras/simdmathlibrary/ppu/Makefile +++ b/Extras/simdmathlibrary/ppu/Makefile @@ -31,6 +31,7 @@ # All that you do to add a file is edit OBJS, the rest will just work prefix = /usr +prefix_ppu = $(prefix) DESTDIR = OBJS = fabsf4.o absi4.o truncf4.o sqrtf4.o tanf4.o \ @@ -43,10 +44,10 @@ OBJS = fabsf4.o absi4.o truncf4.o sqrtf4.o tanf4.o \ fmodf4.o negatei4.o exp2f4.o powf4.o atanf4.o \ atan2f4.o acosf4.o -INCLUDES_PPU = -I../ +INCLUDES_PPU = -I. -I../common -ARCH_PPU = 64 -CROSS_PPU = ppu- +ARCH_PPU = 32 +CROSS_PPU = AR_PPU = $(CROSS_PPU)ar CC_PPU = $(CROSS_PPU)gcc CXX_PPU = $(CROSS_PPU)g++ @@ -61,6 +62,7 @@ INSTALL = install MAKE_DEFS = \ prefix='$(prefix)' \ + prefix_ppu='$(prefix_ppu)' \ DESTDIR='$(DESTDIR)' \ LIB_BASE='$(LIB_BASE)' \ LIB_NAME='$(LIB_NAME)' \ @@ -78,13 +80,15 @@ MAKE_DEFS = \ LIB_MAJOR_VERSION = 1 LIB_MINOR_VERSION = 0 +LIB_RELEASE = 1 +LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE) LIB_BASE = simdmath LIB_NAME = lib$(LIB_BASE) STATIC_LIB = $(LIB_NAME).a SHARED_LIB = $(LIB_NAME).so SHARED_LIB_SONAME = $(SHARED_LIB).$(LIB_MAJOR_VERSION) -SHARED_LIB_FULL = $(SHARED_LIB).$(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION) +SHARED_LIB_FULL = $(SHARED_LIB).$(LIB_FULL_VERSION) ALL_LIBS = $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_FULL) $(SHARED_LIB_SONAME) @@ -98,50 +102,36 @@ $(STATIC_LIB): $(OBJS) $(AR_PPU) cr $@ $(OBJS) $(RANLIB_PPU) $@ -$(SHARED_LIB): $(OBJS) +$(SHARED_LIB_FULL): $(OBJS) $(CC_PPU) -shared $(OBJS) -o $@ $(LDFLAGS_PPU) -Wl,-h,$(SHARED_LIB_SONAME) -$(SHARED_LIB_SONAME) $(SHARED_LIB_FULL): $(SHARED_LIB) - ln -fs $(SHARED_LIB) $@ +$(SHARED_LIB) $(SHARED_LIB_SONAME): $(SHARED_LIB_FULL) + ln -fs $(SHARED_LIB_FULL) $@ install: $(ALL_LIBS) - $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include - $(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/include/ - $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/lib - $(INSTALL) -m 644 $(STATIC_LIB) $(DESTDIR)$(prefix)/lib/$(STATIC_LIB) - $(INSTALL) -m 755 $(SHARED_LIB) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_FULL) - ln -fs $(SHARED_LIB_FULL) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_SONAME) - ln -fs $(SHARED_LIB_SONAME) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB) + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/include + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/include/simdmath + $(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_ppu)/include/simdmath/ + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/lib + $(INSTALL) -m 644 $(STATIC_LIB) $(DESTDIR)$(prefix_ppu)/lib/$(STATIC_LIB) + $(INSTALL) -m 755 $(SHARED_LIB_FULL) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB_FULL) + ln -fs $(SHARED_LIB_FULL) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB_SONAME) + ln -fs $(SHARED_LIB_SONAME) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB) clean: cd tests; $(MAKE) $(MAKE_DEFS) clean rm -f $(OBJS) rm -f $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_SONAME) $(SHARED_LIB_FULL) -$(OBJS): ../simdmath.h common-types.h +$(OBJS): ../common/simdmath.h simdmath/_vec_utils.h check: $(ALL_LIBS) cd tests; $(MAKE) $(MAKE_DEFS) all; $(MAKE) $(MAKE_DEFS) check # Some Objects have special header files. -sinf4.o cosf4.o sincosf4.o tanf4.o: sincos_c.h +sinf4.o cosf4.o sincosf4.o tanf4.o: ../common/simdmath/_sincos.h -%.o: %.c +%.o: ../common/%.c simdmath/%.h $(CC_PPU) $(CFLAGS_PPU) -c $< - -#---------- -# C++ -#---------- -%.o: %.C - $(CXX_PPU) $(CFLAGS_PPU) -c $< - -%.o: %.cpp - $(CXX_PPU) $(CFLAGS_PPU) -c $< - -%.o: %.cc - $(CXX_PPU) $(CFLAGS_PPU) -c $< - -%.o: %.cxx - $(CXX_PPU) $(CFLAGS_PPU) -c $< diff --git a/Extras/simdmathlibrary/ppu/atanf4.c b/Extras/simdmathlibrary/ppu/atanf4.c deleted file mode 100644 index 5fcea7f11..000000000 --- a/Extras/simdmathlibrary/ppu/atanf4.c +++ /dev/null @@ -1,83 +0,0 @@ -/* atanf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "common-types.h" - -// -// Computes the inverse tangent of all four slots of x. -// -vector float -atanf4 (vector float x) -{ - vec_float4 bias; - vec_float4 x2, x3, x4, x8, x9; - vec_float4 hi, lo; - vec_float4 result; - vec_float4 inv_x; - vec_uint4 sign; - vec_uint4 select; - vec_float4 xabs; - vec_float4 vzero = (vec_float4){0.0, 0.0, 0.0, 0.0}; - - sign = vec_and((vec_uint4)x, vec_splatsu4(0x80000000)); - xabs = (vec_float4)vec_andc((vec_uint4)x, vec_splatsu4(0x80000000)); - inv_x = recipf4(x); - inv_x = (vec_float4)vec_xor((vec_uint4)inv_x, vec_splatsu4(0x80000000)); - select = (vec_uint4)vec_cmpgt(xabs, ((vec_float4){1.0, 1.0, 1.0, 1.0}) ); - bias = (vec_float4)vec_or(sign, (vec_uint4)(vec_splatsf4(1.57079632679489661923f))); - bias = (vec_float4)vec_and((vec_uint4)bias, select); - - x = vec_sel(x, inv_x, select); - - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves(hi and lo). - */ - bias = vec_add(bias, x); - x2 = vec_madd(x, x, vzero); - x3 = vec_madd(x2, x, vzero); - x4 = vec_madd(x2, x2, vzero); - x8 = vec_madd(x4, x4, vzero); - x9 = vec_madd(x8, x, vzero); - hi = vec_madd(vec_splatsf4(0.0028662257), x2, vec_splatsf4(-0.0161657367)); - hi = vec_madd(hi, x2, vec_splatsf4(0.0429096138)); - hi = vec_madd(hi, x2, vec_splatsf4(-0.0752896400)); - hi = vec_madd(hi, x2, vec_splatsf4(0.1065626393)); - lo = vec_madd(vec_splatsf4(-0.1420889944), x2, vec_splatsf4(0.1999355085)); - lo = vec_madd(lo, x2, vec_splatsf4(-0.3333314528)); - lo = vec_madd(lo, x3, bias); - - result = vec_madd(hi, x9, lo); - return result; -} - diff --git a/Extras/simdmathlibrary/ppu/cbrtf4.c b/Extras/simdmathlibrary/ppu/cbrtf4.c deleted file mode 100644 index 4aa7d6312..000000000 --- a/Extras/simdmathlibrary/ppu/cbrtf4.c +++ /dev/null @@ -1,103 +0,0 @@ -/* cbrtf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "common-types.h" - -#define __calcQuot(xexp) n = xexp; \ - vec_uint4 negxexpmask = (vec_uint4)vec_cmpgt( ((vec_int4){0, 0, 0, 0}) , n); \ - n = vec_sel(n, vec_add(n, ((vec_int4){2, 2, 2, 2}) ), negxexpmask); \ - \ - quot = vec_add(vec_sra(n, ((vec_uint4){2, 2, 2, 2}) ), vec_sra(n, ((vec_uint4){4, 4, 4, 4}) )); \ - quot = vec_add(quot, vec_sra(quot, ((vec_uint4){4, 4, 4, 4}) )); \ - quot = vec_add(quot, vec_sra(quot, ((vec_uint4){8, 8, 8, 8}) )); \ - quot = vec_add(quot, vec_sra(quot, ((vec_uint4){16, 16, 16, 16}) )); \ - vec_int4 r = vec_sub(vec_sub(n,quot), vec_sl(quot, ((vec_uint4){1, 1, 1, 1}) )); \ - quot = vec_add( \ - quot, \ - vec_sra( \ - vec_add( \ - vec_add(r,((vec_int4){5, 5, 5, 5})), \ - vec_sl (r,((vec_uint4){2, 2, 2, 2})) \ - ), \ - ((vec_uint4){4, 4, 4, 4}) \ - ) \ - ); \ - -#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3) -#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) - -vector float -cbrtf4 (vector float x) -{ - vec_float4 zeros = (vec_float4){0.0f, 0.0f, 0.0f, 0.0f}; - vec_int4 xexp, n; - vec_float4 sgnmask = (vec_float4)(vec_splatsi4(0x80000000)); - vec_uint4 negmask = (vec_uint4)vec_cmpgt(zeros, x); - x = vec_andc(x, sgnmask); - - x = frexpf4(x, &xexp); - vec_float4 p = vec_madd( - vec_madd(x, vec_splatsf4(-0.191502161678719066f), vec_splatsf4(0.697570460207922770f)), - x, - vec_splatsf4(0.492659620528969547f) - ); - vec_float4 p3 = vec_madd(p, vec_madd(p, p, zeros), zeros); - - vec_int4 quot; - __calcQuot(xexp); - vec_int4 modval = vec_sub(vec_sub(xexp,quot), vec_sl(quot,vec_splatsu4(1))); // mod = xexp - 3*quotient - vec_float4 factor = vec_splatsf4(1.0/_CBRTF_H_sqr_cbrt2); - factor = vec_sel(factor, vec_splatsf4(1.0/_CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4(-1))); - factor = vec_sel(factor, vec_splatsf4( 1.0), vec_cmpeq(modval,vec_splatsi4( 0))); - factor = vec_sel(factor, vec_splatsf4( _CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4( 1))); - factor = vec_sel(factor, vec_splatsf4(_CBRTF_H_sqr_cbrt2), vec_cmpeq(modval,vec_splatsi4( 2))); - - vec_float4 pre = vec_madd(p, factor, zeros); - vec_float4 numr = vec_madd(x , vec_splatsf4(2.0f), p3); - vec_float4 denr = vec_madd(p3, vec_splatsf4(2.0f), x ); - vec_float4 res = vec_madd(pre, divf4(numr, denr), zeros); - res = ldexpf4(res, quot); - - return vec_sel(res, vec_or(res,sgnmask), negmask); -} - -/* -_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x)) -{ - vec_uint4 neg = (vec_uint4)vec_cmpgt((vec_float4)(0.0f), x); - vec_float4 sbit = (vec_float4)(vec_float4)((int)0x80000000); - vec_float4 absx = vec_andc(x, sbit); - vec_float4 res = exp2f4(vec_mul((vec_float4)(0.3333333333333f), log2f4(absx))); - res = vec_sel(res, vec_or(sbit, res), neg); - return res; -} -*/ diff --git a/Extras/simdmathlibrary/ppu/cosf4.c b/Extras/simdmathlibrary/ppu/cosf4.c deleted file mode 100644 index 8b395fba2..000000000 --- a/Extras/simdmathlibrary/ppu/cosf4.c +++ /dev/null @@ -1,104 +0,0 @@ -/* cosf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "sincos_c.h" -#include "common-types.h" - - -// -// Computes the cosine of each of the four slots -// by using a polynomial approximation. -// -vector float -cosf4 (vector float x) -{ - vec_float4 xl,xl2,xl3,res; - vec_int4 q; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); - q = vec_cts(xl,0); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = vec_add(vec_splatsi4(1),vec_and(q,vec_splatsi4((int)0x3))); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = vec_ctf(q,0); - vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); - xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); - xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); - vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); - - vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); - vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); - - vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); - vec_float4 sx = vec_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset, - ((vec_int4){0x1, 0x1, 0x1, 0x1})), - ((vec_int4){0, 0, 0, 0})); - res = vec_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0)); - res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2); - - return res; - -} - diff --git a/Extras/simdmathlibrary/ppu/log10f4.c b/Extras/simdmathlibrary/ppu/log10f4.c deleted file mode 100644 index 32691b523..000000000 --- a/Extras/simdmathlibrary/ppu/log10f4.c +++ /dev/null @@ -1,82 +0,0 @@ -/* log10f4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - - -#include -#include - -#include "common-types.h" - -#define _LOG10F_H_loga2msb ((float) 0.3010299205780f) -#define _LOG10F_H_loga2lsb ((float) 7.5085978266e-8f) -#define _LOG10F_H_logaemsb ((float) 0.4342944622040f) -#define _LOG10F_H_logaelsb ((float) 1.9699272335e-8f) -#define _LOG10F_H_neglogae ((float)-0.4342944819033f) - -#define _LOG10F_H_c0 ((float)(-0.2988439998f)) -#define _LOG10F_H_c1 ((float)(-0.3997655209f)) -#define _LOG10F_H_c2 ((float)(-0.6666679125f)) - -vector float -log10f4 (vector float x) -{ - vec_int4 zeros = vec_splatsi4(0); - vec_float4 ones = vec_splatsf4(1.0f); - //vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros); - - vec_int4 expmask = vec_splatsi4(0x7F800000); - vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); - x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); - - vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x); - x = vec_sel(x , vec_add(x, x) , mask); - xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1)), mask); - - vec_float4 x1 = vec_sub(x , ones); - vec_float4 z = divf4 (x1, vec_add(x, ones)); - vec_float4 w = vec_madd(z , z, (vec_float4)zeros); - vec_float4 polywneg; - polywneg = vec_madd(vec_splatsf4(_LOG10F_H_c0), w, vec_splatsf4(_LOG10F_H_c1)); - polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG10F_H_c2)); - - vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); - vec_float4 wnew = vec_ctf(xexp,0); - - vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG10F_H_logaemsb), x1, - vec_madd(vec_splatsf4(_LOG10F_H_loga2msb),wnew,(vec_float4)zeros)); - vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG10F_H_logaelsb), x1, - vec_madd(vec_splatsf4(_LOG10F_H_loga2lsb), wnew, - vec_madd(vec_splatsf4(_LOG10F_H_neglogae),y,(vec_float4)zeros)) - ); - - //return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask); - return vec_add(zz1, zz2); -} - - diff --git a/Extras/simdmathlibrary/ppu/powf4.c b/Extras/simdmathlibrary/ppu/powf4.c deleted file mode 100644 index f290219cf..000000000 --- a/Extras/simdmathlibrary/ppu/powf4.c +++ /dev/null @@ -1,74 +0,0 @@ -/* exp2f4 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include "common-types.h" - -vector float -powf4 (vector float x, vector float y) -{ - vec_int4 zeros = (vec_int4){0,0,0,0}; - vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x); - - vec_uint4 negmask = (vec_uint4)vec_cmpgt((vec_float4)zeros, x); - - vec_float4 sbit = (vec_float4)(vec_splatsi4(0x80000000)); - vec_float4 absx = vec_andc(x, sbit); - vec_float4 absy = vec_andc(y, sbit); - vec_uint4 oddy = vec_and(vec_ctu(absy, 0), vec_splatsu4(0x00000001)); - negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros)); - - vec_float4 res = exp2f4(vec_madd(y, log2f4(absx), (vec_float4)zeros)); - res = vec_sel(res, vec_or(sbit, res), negmask); - - - return vec_sel(res, (vec_float4)zeros, zeromask); -} - -/* -{ - vec_int4 zeros = vec_splats(0); - vec_int4 ones = (vec_int4)vec_splats((char)0xFF); - vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x); - vec_uint4 onemask = (vec_uint4)vec_cmpeq((vec_float4)ones , y); - vec_uint4 negmask = (vec_uint4)vec_cmpgt(vec_splats(0.0f), x); - vec_float4 sbit = (vec_float4)(vec_int4)(0x80000000); - vec_float4 absx = vec_andc(x, sbit); - vec_float4 absy = vec_andc(y, sbit); - vec_uint4 oddy = vec_and(vec_convtu(absy, 0), (vec_uint4)vec_splats(0x00000001)); - negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros)); - - - -} - -*/ diff --git a/Extras/simdmathlibrary/ppu/common-types.h b/Extras/simdmathlibrary/ppu/simdmath/_vec_utils.h similarity index 79% rename from Extras/simdmathlibrary/ppu/common-types.h rename to Extras/simdmathlibrary/ppu/simdmath/_vec_utils.h index 83fc82041..b5bf40aa3 100644 --- a/Extras/simdmathlibrary/ppu/common-types.h +++ b/Extras/simdmathlibrary/ppu/simdmath/_vec_utils.h @@ -27,26 +27,25 @@ POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ___COMMON_TYPES_H___ -#define ___COMMON_TYPES_H___ +#ifndef ___SIMD_MATH_VEC_UTILS_H___ +#define ___SIMD_MATH_VEC_UTILS_H___ -typedef vector float vec_float4; -typedef vector signed int vec_int4; -typedef vector unsigned int vec_uint4; - -static inline vec_float4 vec_splatsf4(const float x) +static inline vector float +__vec_splatsf4(const float x) { - return (vec_float4) {x, x, x, x}; + return (vector float) {x, x, x, x}; } -static inline vec_int4 vec_splatsi4(const signed int x) +static inline vector signed int +__vec_splatsi4(const signed int x) { - return (vec_int4) {x, x, x, x}; + return (vector signed int) {x, x, x, x}; } -static inline vec_uint4 vec_splatsu4(const unsigned int x) +static inline vector unsigned int +__vec_splatsu4(const unsigned int x) { - return (vec_uint4) {x, x, x, x}; + return (vector unsigned int) {x, x, x, x}; } #endif diff --git a/Extras/simdmathlibrary/spu/absi4.c b/Extras/simdmathlibrary/ppu/simdmath/absi4.h similarity index 90% rename from Extras/simdmathlibrary/spu/absi4.c rename to Extras/simdmathlibrary/ppu/simdmath/absi4.h index fd6ee0d63..65cdc959c 100644 --- a/Extras/simdmathlibrary/spu/absi4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/absi4.h @@ -27,14 +27,16 @@ POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +#ifndef ___SIMD_MATH_ABSI4_H___ +#define ___SIMD_MATH_ABSI4_H___ -vector signed int -absi4 (vector signed int x) +#include +#include + +static inline vector signed int +_absi4 (vector signed int x) { - vec_int4 neg; - neg = spu_sub( 0, x ); - return spu_sel( neg, x, spu_cmpgt( x, -1 ) ); + return vec_abs( x ); } +#endif diff --git a/Extras/simdmathlibrary/ppu/acosf4.c b/Extras/simdmathlibrary/ppu/simdmath/acosf4.h similarity index 53% rename from Extras/simdmathlibrary/ppu/acosf4.c rename to Extras/simdmathlibrary/ppu/simdmath/acosf4.h index 88255e50e..e84e803e8 100644 --- a/Extras/simdmathlibrary/ppu/acosf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/acosf4.h @@ -27,53 +27,56 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ACOSF4_H___ +#define ___SIMD_MATH_ACOSF4_H___ + #include #include -#include "common-types.h" +#include // // Computes the inverse cosine of all four slots of x. // -vector float -acosf4 (vector float x) +static inline vector float +_acosf4 (vector float x) { - vec_float4 result, xabs; - vec_float4 t1; - vec_float4 xabs2, xabs4; - vec_float4 hi, lo; - vec_float4 neg, pos; - vec_uint4 select; + vector float result, xabs; + vector float t1; + vector float xabs2, xabs4; + vector float hi, lo; + vector float neg, pos; + vector unsigned int select; - xabs = vec_abs(x); - select = (vec_uint4)(vec_sra((vec_int4)(x), ((vec_uint4){31, 31, 31, 31}) )); + xabs = vec_abs(x); + select = (vector unsigned int)(vec_sra((vector signed int)(x), __vec_splatsu4(31))); - t1 = sqrtf4(vec_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs)); + t1 = _sqrtf4(vec_sub(__vec_splatsf4(1.0f), xabs)); - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves (hi amd lo). - */ - xabs2 = vec_madd(xabs, xabs, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); - xabs4 = vec_madd(xabs2, xabs2, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); - hi = vec_madd( ((vec_float4){-0.0012624911, -0.0012624911, -0.0012624911, -0.0012624911}) , xabs, ((vec_float4){0.0066700901, 0.0066700901, 0.0066700901, 0.0066700901}) ); - hi = vec_madd(hi, xabs, vec_splatsf4(-0.0170881256)); - hi = vec_madd(hi, xabs, vec_splatsf4( 0.0308918810)); - lo = vec_madd(vec_splatsf4(-0.0501743046), xabs, vec_splatsf4(0.0889789874)); - lo = vec_madd(lo, xabs, vec_splatsf4(-0.2145988016)); - lo = vec_madd(lo, xabs, vec_splatsf4( 1.5707963050)); + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + xabs2 = vec_madd(xabs, xabs, __vec_splatsf4(0.0f) ); + xabs4 = vec_madd(xabs2, xabs2, __vec_splatsf4(0.0f) ); + hi = vec_madd(__vec_splatsf4(-0.0012624911) , xabs, __vec_splatsf4(0.0066700901)); + hi = vec_madd(hi, xabs, __vec_splatsf4(-0.0170881256)); + hi = vec_madd(hi, xabs, __vec_splatsf4( 0.0308918810)); + lo = vec_madd(__vec_splatsf4(-0.0501743046), xabs, __vec_splatsf4(0.0889789874)); + lo = vec_madd(lo, xabs, __vec_splatsf4(-0.2145988016)); + lo = vec_madd(lo, xabs, __vec_splatsf4( 1.5707963050)); - result = vec_madd(hi, xabs4, lo); + result = vec_madd(hi, xabs4, lo); - /* Adjust the result if x is negactive. - */ - neg = vec_nmsub(t1, result, vec_splatsf4(3.1415926535898f)); - pos = vec_madd(t1, result, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); + /* Adjust the result if x is negactive. + */ + neg = vec_nmsub(t1, result, __vec_splatsf4(3.1415926535898f)); + pos = vec_madd(t1, result, __vec_splatsf4(0.0f)); - result = vec_sel(pos, neg, select); + result = vec_sel(pos, neg, select); - return result; + return result; } - +#endif diff --git a/Extras/simdmathlibrary/ppu/asinf4.c b/Extras/simdmathlibrary/ppu/simdmath/asinf4.h similarity index 51% rename from Extras/simdmathlibrary/ppu/asinf4.c rename to Extras/simdmathlibrary/ppu/simdmath/asinf4.h index 3bf25ad9f..51a7d671a 100644 --- a/Extras/simdmathlibrary/ppu/asinf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/asinf4.h @@ -27,60 +27,69 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ASINF4_H___ +#define ___SIMD_MATH_ASINF4_H___ + #include #include -#include "common-types.h" +#include +#include -vector float asinf4 (vector float x) +static inline vector float +_asinf4 (vector float x) { - // positive = (x > 0) - // - vec_uint4 positive = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.0f)); + // positive = (x > 0) + // + vector unsigned int positive = (vector unsigned int)vec_cmpgt(x, __vec_splatsf4(0.0f)); - // x = absf(x) - // - x = vec_abs(x); + // x = absf(x) + // + x = vec_abs(x); - // gtHalf = (|x| > 0.5) - // - vec_uint4 gtHalf = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.5f)); + // gtHalf = (|x| > 0.5) + // + vector unsigned int gtHalf = (vector unsigned int)vec_cmpgt(x, __vec_splatsf4(0.5f)); - // if (x > 0.5) - // g = 0.5 - 0.5*x - // x = -2 * sqrtf(g) - // else - // g = x * x - // - vec_float4 g = vec_sel(vec_madd(x,x,vec_splatsf4(0.0f)),vec_madd(vec_splatsf4(-0.5f),x,vec_splatsf4(0.5f)),gtHalf); - - x = vec_sel(x,vec_madd(vec_splatsf4(-2.0f),sqrtf4(g),vec_splatsf4(0.0f)),gtHalf); + // if (x > 0.5) + // g = 0.5 - 0.5*x + // x = -2 * sqrtf(g) + // else + // g = x * x + // + vector float g = + vec_sel(vec_madd(x, x, __vec_splatsf4(0.0f)), + vec_madd(__vec_splatsf4(-0.5f), x, __vec_splatsf4(0.5f)), gtHalf); - // Compute the polynomials and take their ratio - // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f - // num = x * g * (-0.504400557f * g + 0.933933258f) - // - vec_float4 denom = vec_add(g,vec_splatsf4(-5.54846723f)); - vec_float4 num = vec_madd(vec_splatsf4(-0.504400557f),g,vec_splatsf4(0.933933258f)); - denom = vec_madd(denom,g,vec_splatsf4(5.603603363f)); - num = vec_madd(vec_madd(x,g,vec_splatsf4(0.0f)),num,vec_splatsf4(0.0f)); + x = vec_sel(x, vec_madd(__vec_splatsf4(-2.0f), _sqrtf4(g), __vec_splatsf4(0.0f)), gtHalf); + + // Compute the polynomials and take their ratio + // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f + // num = x * g * (-0.504400557f * g + 0.933933258f) + // + vector float denom = vec_add(g, __vec_splatsf4(-5.54846723f)); + vector float num = vec_madd(__vec_splatsf4(-0.504400557f), g, __vec_splatsf4(0.933933258f)); + denom = vec_madd(denom, g, __vec_splatsf4(5.603603363f)); + num = vec_madd(vec_madd(x, g, __vec_splatsf4(0.0f)), num, __vec_splatsf4(0.0f)); - // x = x + num / denom - // - x = vec_add(x,divf4(num,denom)); + // x = x + num / denom + // + x = vec_add(x,_divf4(num,denom)); - // if (x > 0.5) - // x = x + M_PI_2 - // - x = vec_sel(x,vec_add(x,vec_splatsf4(1.57079632679489661923f)),gtHalf); + // if (x > 0.5) + // x = x + M_PI_2 + // + x = vec_sel(x,vec_add(x, __vec_splatsf4(1.57079632679489661923f)), gtHalf); - // if (!positive) x = -x - // - x = vec_sel((vec_float4)vec_xor(vec_splatsi4(0x80000000),(vec_int4)x),x,positive); + // if (!positive) x = -x + // + x = vec_sel((vector float)vec_xor(__vec_splatsi4(0x80000000), (vector signed int)x), + x, positive); - return x; + return x; } +#endif diff --git a/Extras/simdmathlibrary/ppu/atan2f4.c b/Extras/simdmathlibrary/ppu/simdmath/atan2f4.h similarity index 69% rename from Extras/simdmathlibrary/ppu/atan2f4.c rename to Extras/simdmathlibrary/ppu/simdmath/atan2f4.h index 72cda6833..d201e3595 100644 --- a/Extras/simdmathlibrary/ppu/atan2f4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/atan2f4.h @@ -27,35 +27,41 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ATAN2F4_H___ +#define ___SIMD_MATH_ATAN2F4_H___ + #include #include -#include "common-types.h" - +#include // // Inverse tangent function of two variables // -vector float -atan2f4 (vector float y, vector float x) +static inline vector float +_atan2f4 (vector float y, vector float x) { - vec_float4 res = atanf4(divf4(y,x)); + vector float res = _atanf4(divf4(y,x)); - // Use the arguments to determine the quadrant of the result: - // if (x < 0) - // if (y < 0) - // res = -PI + res - // else - // res = PI + res - // - vec_uint4 yNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,y); - vec_uint4 xNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,x); + // Use the arguments to determine the quadrant of the result: + // if (x < 0) + // if (y < 0) + // res = -PI + res + // else + // res = PI + res + // + vector unsigned int yNeg = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.0f), y); + vector unsigned int xNeg = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.0f) ,x); - vec_float4 bias = vec_sel(vec_splatsf4(3.14159265358979323846f),vec_splatsf4(-3.14159265358979323846f),yNeg); + vector float bias = + vec_sel(__vec_splatsf4(3.14159265358979323846f), + __vec_splatsf4(-3.14159265358979323846f), yNeg); - vec_float4 newRes = vec_add(bias, res); + vector float newRes = vec_add(bias, res); - res = vec_sel(res,newRes,xNeg); + res = vec_sel(res,newRes,xNeg); - return res; + return res; } + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/atanf4.h b/Extras/simdmathlibrary/ppu/simdmath/atanf4.h new file mode 100644 index 000000000..a495ecb04 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/atanf4.h @@ -0,0 +1,87 @@ +/* atanf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ATANF4_H___ +#define ___SIMD_MATH_ATANF4_H___ + +#include +#include + +#include + +// +// Computes the inverse tangent of all four slots of x. +// +static inline vector float +_atanf4 (vector float x) +{ + vector float bias; + vector float x2, x3, x4, x8, x9; + vector float hi, lo; + vector float result; + vector float inv_x; + vector unsigned int sign; + vector unsigned int select; + vector float xabs; + vector float vzero = __vec_splatsf4(0.0f); + + sign = vec_and((vector unsigned int)x, __vec_splatsu4(0x80000000)); + xabs = (vector float)vec_andc((vector unsigned int)x, __vec_splatsu4(0x80000000)); + inv_x = _recipf4(x); + inv_x = (vector float)vec_xor((vector unsigned int)inv_x, __vec_splatsu4(0x80000000)); + select = (vector unsigned int)vec_cmpgt(xabs, __vec_splatsf4(1.0f)); + bias = (vector float)vec_or(sign, (vector unsigned int)(__vec_splatsf4(1.57079632679489661923f))); + bias = (vector float)vec_and((vector unsigned int)bias, select); + + x = vec_sel(x, inv_x, select); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves(hi and lo). + */ + bias = vec_add(bias, x); + x2 = vec_madd(x, x, vzero); + x3 = vec_madd(x2, x, vzero); + x4 = vec_madd(x2, x2, vzero); + x8 = vec_madd(x4, x4, vzero); + x9 = vec_madd(x8, x, vzero); + hi = vec_madd(__vec_splatsf4(0.0028662257), x2, __vec_splatsf4(-0.0161657367)); + hi = vec_madd(hi, x2, __vec_splatsf4(0.0429096138)); + hi = vec_madd(hi, x2, __vec_splatsf4(-0.0752896400)); + hi = vec_madd(hi, x2, __vec_splatsf4(0.1065626393)); + lo = vec_madd(__vec_splatsf4(-0.1420889944), x2, __vec_splatsf4(0.1999355085)); + lo = vec_madd(lo, x2, __vec_splatsf4(-0.3333314528)); + lo = vec_madd(lo, x3, bias); + + result = vec_madd(hi, x9, lo); + return result; +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/cbrtf4.h b/Extras/simdmathlibrary/ppu/simdmath/cbrtf4.h new file mode 100644 index 000000000..3546b40e2 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/cbrtf4.h @@ -0,0 +1,97 @@ +/* cbrtf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_CBRTF4_H___ +#define ___SIMD_MATH_CBRTF4_H___ + +#include +#include + +#include +#include +#include + +static inline vector signed int +__cbrtf4_calc_quot(vector signed int n) +{ + vector signed int quot; + vector unsigned int negxexpmask = (vector unsigned int)vec_cmpgt(__vec_splatsi4(0), n); + n = vec_sel(n, vec_add(n, __vec_splatsi4(2)), negxexpmask); + + quot = vec_add(vec_sra(n, __vec_splatsu4(2)), vec_sra(n, __vec_splatsu4(4))); + quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(4))); + quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(8))); + quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(16))); + vector signed int r = vec_sub(vec_sub(n,quot), vec_sl(quot, __vec_splatsu4(1))); + quot = vec_add(quot, + vec_sra(vec_add(vec_add(r, __vec_splatsi4(5)), + vec_sl (r, __vec_splatsu4(2))), + __vec_splatsu4(4))); + + return quot; +} + +#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3) +#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) + +static inline vector float +_cbrtf4 (vector float x) +{ + vector float zeros = __vec_splatsf4(0.0f); + vector signed int xexp; + vector float sgnmask = (vector float)__vec_splatsi4(0x80000000); + vector unsigned int negmask = (vector unsigned int)vec_cmpgt(zeros, x); + x = vec_andc(x, sgnmask); + + x = _frexpf4(x, &xexp); + vector float p = + vec_madd(vec_madd(x, __vec_splatsf4(-0.191502161678719066f), __vec_splatsf4(0.697570460207922770f)), + x, + __vec_splatsf4(0.492659620528969547f)); + vector float p3 = vec_madd(p, vec_madd(p, p, zeros), zeros); + + vector signed int quot = __cbrtf4_calc_quot(xexp); + // mod = xexp - 3*quotient + vector signed int modval = vec_sub(vec_sub(xexp,quot), vec_sl(quot, __vec_splatsu4(1))); + vector float factor = __vec_splatsf4(1.0/__CBRTF_sqr_cbrt2); + factor = vec_sel(factor, __vec_splatsf4(1.0/__CBRTF_cbrt2), vec_cmpeq(modval, __vec_splatsi4(-1))); + factor = vec_sel(factor, __vec_splatsf4( 1.0), vec_cmpeq(modval, __vec_splatsi4( 0))); + factor = vec_sel(factor, __vec_splatsf4( __CBRTF_cbrt2), vec_cmpeq(modval, __vec_splatsi4( 1))); + factor = vec_sel(factor, __vec_splatsf4(__CBRTF_sqr_cbrt2), vec_cmpeq(modval, __vec_splatsi4( 2))); + + vector float pre = vec_madd(p, factor, zeros); + vector float numr = vec_madd(x , __vec_splatsf4(2.0f), p3); + vector float denr = vec_madd(p3, __vec_splatsf4(2.0f), x ); + vector float res = vec_madd(pre, _divf4(numr, denr), zeros); + res = _ldexpf4(res, quot); + + return vec_sel(res, vec_or(res,sgnmask), negmask); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/ceilf4.h b/Extras/simdmathlibrary/ppu/simdmath/ceilf4.h new file mode 100644 index 000000000..9235a94ec --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/ceilf4.h @@ -0,0 +1,42 @@ +/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_CEILF4_H___ +#define ___SIMD_MATH_CEILF4_H___ + +#include +#include + +static inline vector float +_ceilf4(vector float x) +{ + return vec_ceil( x ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/copysignf4.h b/Extras/simdmathlibrary/ppu/simdmath/copysignf4.h new file mode 100644 index 000000000..ad16c84fd --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/copysignf4.h @@ -0,0 +1,45 @@ +/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_COPYSIGNF4_H___ +#define ___SIMD_MATH_COPYSIGNF4_H___ + +#include +#include + +#include + + +static inline vector float +_copysignf4(vector float x, vector float y) +{ + return vec_sel(x, y, __vec_splatsu4(0x80000000)); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/cosf4.h b/Extras/simdmathlibrary/ppu/simdmath/cosf4.h new file mode 100644 index 000000000..d774a8392 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/cosf4.h @@ -0,0 +1,107 @@ +/* cosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_COSF4_H___ +#define ___SIMD_MATH_COSF4_H___ + +#include +#include + +#include +#include + + +// +// Computes the cosine of each of the four slots +// by using a polynomial approximation. +// +static inline vector float +_cosf4 (vector float x) +{ + vector float xl,xl2,xl3,res; + vector signed int q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, __vec_splatsf4(0.63661977236f), __vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl, vec_sel(__vec_splatsf4(0.5f), xl, __vec_splatsu4(0x80000000))); + q = vec_cts(xl, 0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vector signed int offset = vec_add(__vec_splatsi4(1), vec_and(q, __vec_splatsi4(0x3))); + + // Remainder in range [-pi/4..pi/4] + // + vector float qf = vec_ctf(q,0); + vector float p1 = vec_nmsub(qf, __vec_splatsf4(__SINCOSF_KC1), x); + xl = vec_nmsub(qf, __vec_splatsf4(__SINCOSF_KC2), p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl, xl, __vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2, xl, __vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0), xl2, __vec_splatsf4(__SINCOSF_CC1)); + vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0), xl2, __vec_splatsf4(__SINCOSF_SC1)); + + vector float ct2 = vec_madd(ct1, xl2, __vec_splatsf4(__SINCOSF_CC2)); + vector float st2 = vec_madd(st1, xl2, __vec_splatsf4(__SINCOSF_SC2)); + + vector float cx = vec_madd(ct2, xl2, __vec_splatsf4(1.0f)); + vector float sx = vec_madd(st2, xl3, xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vector unsigned int mask1 = + (vector unsigned int)vec_cmpeq(vec_and(offset, __vec_splatsi4(0x1)), __vec_splatsi4(0)); + res = vec_sel(cx, sx, mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vector unsigned int mask2 = + (vector unsigned int)vec_cmpeq(vec_and(offset, __vec_splatsi4(0x2)), __vec_splatsi4(0)); + res = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000U), (vector unsigned int)res), res, mask2); + + return res; +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/divf4.c b/Extras/simdmathlibrary/ppu/simdmath/divf4.h similarity index 80% rename from Extras/simdmathlibrary/ppu/divf4.c rename to Extras/simdmathlibrary/ppu/simdmath/divf4.h index b4e71cab0..67d7dae97 100644 --- a/Extras/simdmathlibrary/ppu/divf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/divf4.h @@ -27,21 +27,25 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_DIVF4_H___ +#define ___SIMD_MATH_DIVF4_H___ + #include #include -#include "common-types.h" +#include -vector float -divf4 (vector float numer, vector float denom) +static inline vector float +_divf4 (vector float numer, vector float denom) { - // Reciprocal estimate and 1 Newton-Raphson iteration. + // Reciprocal estimate and 1 Newton-Raphson iteration. - vector float y0, y0numer; + vector float y0, y0numer; - y0 = vec_re( denom ); - y0numer = vec_madd( numer, y0, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); - return vec_madd( vec_nmsub( denom, y0, ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f}) ), y0numer, y0numer ); + y0 = vec_re(denom); + y0numer = vec_madd(numer, y0, __vec_splatsf4(0.0f)); + return vec_madd(vec_nmsub(denom, y0, __vec_splatsf4(1.0f)), y0numer, y0numer); } +#endif diff --git a/Extras/simdmathlibrary/ppu/divi4.c b/Extras/simdmathlibrary/ppu/simdmath/divi4.h similarity index 51% rename from Extras/simdmathlibrary/ppu/divi4.c rename to Extras/simdmathlibrary/ppu/simdmath/divi4.h index 714bb02d9..8290cfb63 100644 --- a/Extras/simdmathlibrary/ppu/divi4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/divi4.h @@ -27,63 +27,64 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_DIVI4_H___ +#define ___SIMD_MATH_DIVI4_H___ + #include #include -#include "common-types.h" - - +#include // divi4 - for each of four integer slots, compute quotient and remainder of numer/denom // and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator. -divi4_t -divi4 (vec_int4 numer, vec_int4 denom ) +static inline divi4_t +_divi4 (vector signed int numer, vector signed int denom ) { - vec_int4 minusone = vec_splatsi4(-1); - vec_uint4 zero = vec_splatsu4(0); - vec_uint4 one = vec_splatsu4(1); - vec_uint4 k158 = vec_splatsu4(158); - vec_uint4 k23 = vec_splatsu4(23); + vector signed int minusone = __vec_splatsi4(-1); + vector unsigned int zero = __vec_splatsu4(0); + vector unsigned int one = __vec_splatsu4(1); + vector unsigned int k158 = __vec_splatsu4(158); + vector unsigned int k23 = __vec_splatsu4(23); - divi4_t res; - vec_uint4 numerPos, denomPos, quotNeg; - vec_uint4 numerAbs, denomAbs; - vec_uint4 denomZeros, numerZeros, shift, denomShifted, oneShifted; - vec_uint4 quot, newQuot, skip, newNum, cont; - int anyCont; + divi4_t res; + vector unsigned int numerPos, denomPos, quotNeg; + vector unsigned int numerAbs, denomAbs; + vector unsigned int denomZeros, numerZeros, shift, denomShifted, oneShifted; + vector unsigned int quot, newQuot, skip, newNum, cont; + int anyCont; - // determine whether result needs sign change + // determine whether result needs sign change - numerPos = (vec_uint4)vec_cmpgt( numer, minusone ); - denomPos = (vec_uint4)vec_cmpgt( denom, minusone ); - quotNeg = vec_xor( numerPos, denomPos ); + numerPos = (vector unsigned int)vec_cmpgt( numer, minusone ); + denomPos = (vector unsigned int)vec_cmpgt( denom, minusone ); + quotNeg = vec_xor( numerPos, denomPos ); - // use absolute values of numerator, denominator + // use absolute values of numerator, denominator - numerAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, numer ), numer, numerPos ); - denomAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, denom ), denom, denomPos ); + numerAbs = (vector unsigned int)vec_sel( vec_sub( (vector signed int)zero, numer ), numer, numerPos ); + denomAbs = (vector unsigned int)vec_sel( vec_sub( (vector signed int)zero, denom ), denom, denomPos ); - // get difference of leading zeros to align denom with numer + // get difference of leading zeros to align denom with numer - denomZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( denomAbs, 0 ), k23 ) ); - numerZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( numerAbs, 0 ), k23 ) ); + denomZeros = vec_sub( k158, vec_sr( (vector unsigned int)vec_ctf( denomAbs, 0 ), k23 ) ); + numerZeros = vec_sub( k158, vec_sr( (vector unsigned int)vec_ctf( numerAbs, 0 ), k23 ) ); - shift = vec_sub( denomZeros, numerZeros ); - denomShifted = vec_sl( denomAbs, shift ); - oneShifted = vec_sl( one, shift ); - oneShifted = vec_sel( oneShifted, zero, vec_or( vec_cmpeq( denomAbs, zero ), - vec_cmpgt( denomAbs, numerAbs ) ) ); + shift = vec_sub( denomZeros, numerZeros ); + denomShifted = vec_sl( denomAbs, shift ); + oneShifted = vec_sl( one, shift ); + oneShifted = vec_sel( oneShifted, zero, vec_or( vec_cmpeq( denomAbs, zero ), + vec_cmpgt( denomAbs, numerAbs ) ) ); - // long division + // long division - quot = zero; + quot = zero; - do - { - cont = (vec_uint4)vec_cmpgt( oneShifted, zero ); + do + { + cont = (vector unsigned int)vec_cmpgt( oneShifted, zero ); anyCont = vec_any_gt( oneShifted, zero ); - skip = (vec_uint4)vec_cmpgt( denomShifted, numerAbs ); + skip = (vector unsigned int)vec_cmpgt( denomShifted, numerAbs ); newQuot = vec_or( quot, oneShifted ); newNum = vec_sub( numerAbs, denomShifted ); @@ -93,11 +94,12 @@ divi4 (vec_int4 numer, vec_int4 denom ) quot = vec_sel( newQuot, quot, skip ); numerAbs = vec_sel( numerAbs, newNum, vec_andc( cont, skip ) ); - } - while ( anyCont ); + } + while ( anyCont ); - res.quot = (vec_int4)vec_sel( quot, vec_sub( zero, quot ), quotNeg ); - res.rem = (vec_int4)vec_sel( (vec_uint4)vec_sub( (vec_int4)zero, (vec_int4)numerAbs ), numerAbs, numerPos ); - return res; + res.quot = (vector signed int)vec_sel( quot, vec_sub( zero, quot ), quotNeg ); + res.rem = (vector signed int)vec_sel( (vector unsigned int)vec_sub( (vector signed int)zero, (vector signed int)numerAbs ), numerAbs, numerPos ); + return res; } +#endif diff --git a/Extras/simdmathlibrary/ppu/exp2f4.c b/Extras/simdmathlibrary/ppu/simdmath/exp2f4.h similarity index 63% rename from Extras/simdmathlibrary/ppu/exp2f4.c rename to Extras/simdmathlibrary/ppu/simdmath/exp2f4.h index 8fe74b55a..6a4bc0f19 100644 --- a/Extras/simdmathlibrary/ppu/exp2f4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/exp2f4.h @@ -27,15 +27,18 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXP2F4_H___ +#define ___SIMD_MATH_EXP2F4_H___ + #include #include #include -#include "common-types.h" +#include /* * FUNCTION - * vec_float4 _exp2_v(vec_float4 x) + * vector float _exp2_v(vector float x) * * DESCRIPTION * _exp2_v computes 2 raised to the input vector x. Computation is @@ -73,41 +76,37 @@ */ -#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */ +#define __EXP2F_LN2 0.69314718055995f /* ln(2) */ -vector float -exp2f4 (vector float x) +static inline vector float +_exp2f4 (vector float x) { - vec_int4 ix; - vec_uint4 overflow; - vec_uint4 underflow; - vec_float4 frac, frac2, frac4; - vec_float4 exp_int, exp_frac; - vec_float4 result; - vec_float4 hi, lo; - vec_float4 zeros = vec_splatsf4(0.0f); - vec_float4 bias; + vector signed int ix; + vector unsigned int overflow; + vector unsigned int underflow; + vector float frac, frac2, frac4; + vector float exp_int, exp_frac; + vector float result; + vector float hi, lo; + vector float zeros = __vec_splatsf4(0.0f); + vector float bias; /* Break in the input x into two parts ceil(x), x - ceil(x). */ #if 1 - bias = (vec_float4)(vec_sra((vec_int4)x, vec_splatsu4(31) )); - bias = (vec_float4)(vec_andc(vec_splatsu4(0x3F7FFFFF), (vec_uint4)bias)); + bias = (vector float)(vec_sra((vector signed int)x, __vec_splatsu4(31) )); + bias = (vector float)(vec_andc(__vec_splatsu4(0x3F7FFFFF), (vector unsigned int)bias)); ix = vec_cts(vec_add(x, bias), 0); #else - bias = vec_sel(vec_floor(x), vec_ceil(x), vec_cmpgt(x, vec_splatsf4(0.0f))); + bias = vec_sel(vec_floor(x), vec_ceil(x), vec_cmpgt(x, __vec_splatsf4(0.0f))); ix = vec_cts(bias, 0); #endif frac = vec_sub(vec_ctf(ix, 0), x); - frac = vec_madd(frac, vec_splatsf4(_EXP2F_H_LN2), zeros); + frac = vec_madd(frac, __vec_splatsf4(__EXP2F_LN2), zeros); - // !!! HRD Changing weird un-understandable and incorrect overflow handling code - //overflow = vec_sel((vec_uint4)(0x7FFFFFFF), (vec_uint4)x, (vec_uint4)(0x80000000) ); - overflow = (vec_uint4)vec_cmpgt(x, (vec_float4)(vec_splatsi4(0x4300FFFF))); // !!! Biggest possible exponent to fit in range. - underflow = (vec_uint4)vec_cmpgt(vec_splatsf4(-126.0f), x); + overflow = (vector unsigned int)vec_cmpgt(x, (vector float)(__vec_splatsi4(0x4300FFFF))); // !!! Biggest possible exponent to fit in range. + underflow = (vector unsigned int)vec_cmpgt(__vec_splatsf4(-126.0f), x); - //exp_int = (vec_float4)(vec_sl(vec_add(ix, (vec_int4)(127)), (vec_uint4)(23))); // !!! HRD <- changing this to correct for - // !!! overflow (x >= 127.999999f) - exp_int = (vec_float4)(vec_sl(vec_add(ix, vec_splatsi4(126)), vec_splatsu4(23))); // !!! HRD <- add with saturation + exp_int = (vector float)(vec_sl(vec_add(ix, __vec_splatsi4(126)), __vec_splatsu4(23))); // !!! HRD <- add with saturation /* Instruction counts can be reduced if the polynomial was * computed entirely from nested (dependent) fma's. However, @@ -117,22 +116,22 @@ exp2f4 (vector float x) frac2 = vec_madd(frac, frac, zeros); frac4 = vec_madd(frac2, frac2, zeros); - hi = vec_madd(frac, vec_splatsf4(-0.0001413161), vec_splatsf4(0.0013298820)); - hi = vec_madd(frac, hi, vec_splatsf4(-0.0083013598)); - hi = vec_madd(frac, hi, vec_splatsf4(0.0416573475)); - lo = vec_madd(frac, vec_splatsf4(-0.1666653019), vec_splatsf4(0.4999999206)); - lo = vec_madd(frac, lo, vec_splatsf4(-0.9999999995)); - lo = vec_madd(frac, lo, vec_splatsf4(1.0)); + hi = vec_madd(frac, __vec_splatsf4(-0.0001413161), __vec_splatsf4(0.0013298820)); + hi = vec_madd(frac, hi, __vec_splatsf4(-0.0083013598)); + hi = vec_madd(frac, hi, __vec_splatsf4(0.0416573475)); + lo = vec_madd(frac, __vec_splatsf4(-0.1666653019), __vec_splatsf4(0.4999999206)); + lo = vec_madd(frac, lo, __vec_splatsf4(-0.9999999995)); + lo = vec_madd(frac, lo, __vec_splatsf4(1.0)); exp_frac = vec_madd(frac4, hi, lo); - //ix = vec_add(ix, vec_sr((vec_int4)(exp_frac), (vec_uint4)(23) )); result = vec_madd(exp_frac, exp_int, zeros); result = vec_madd(exp_frac, exp_int, result); // !!! HRD /* Handle overflow */ - result = vec_sel(result, vec_splatsf4(HUGE_VALF), overflow); + result = vec_sel(result, __vec_splatsf4(HUGE_VALF), overflow); result = vec_sel(result, zeros, underflow); - //result = vec_sel(result, (vec_float4)(overflow), vec_cmpgt((vec_uint4)(ix), (vec_uint4)(255))); return (result); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/expf4.c b/Extras/simdmathlibrary/ppu/simdmath/expf4.h similarity index 59% rename from Extras/simdmathlibrary/ppu/expf4.c rename to Extras/simdmathlibrary/ppu/simdmath/expf4.h index 00540fe8d..ccdf071c4 100644 --- a/Extras/simdmathlibrary/ppu/expf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/expf4.h @@ -27,39 +27,43 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXPF4_H___ +#define ___SIMD_MATH_EXPF4_H___ + #include #include -#include "common-types.h" +#include +#include -#define _EXPF_H_C1 ((float)-0.6931470632553101f) -#define _EXPF_H_C2 ((float)-1.1730463525082e-7f) +#define __EXPF_C1 -0.6931470632553101f +#define __EXPF_C2 -1.1730463525082e-7f -#define _EXPF_H_INVLN2 ((float)1.4426950408889634f) +#define __EXPF_INVLN2 1.4426950408889634f -vector float -expf4 (vector float x) +static inline vector float +_expf4 (vector float x) { - vec_float4 zeros = vec_splatsf4(0.0f); - vec_uint4 xnegmask = (vec_uint4)vec_cmpgt(zeros, x); - vec_float4 goffset = vec_sel(vec_splatsf4( 0.5f),vec_splatsf4(-0.5f),xnegmask); - vec_float4 g = vec_madd(x, vec_splatsf4(_EXPF_H_INVLN2), zeros); - vec_int4 xexp = vec_cts(vec_add(g, goffset),0); + vector float zeros = __vec_splatsf4(0.0f); + vector unsigned int xnegmask = (vector unsigned int)vec_cmpgt(zeros, x); + vector float goffset = vec_sel(__vec_splatsf4( 0.5f),__vec_splatsf4(-0.5f),xnegmask); + vector float g = vec_madd(x, __vec_splatsf4(__EXPF_INVLN2), zeros); + vector signed int xexp = vec_cts(vec_add(g, goffset),0); g = vec_ctf(xexp, 0); - g = vec_madd(g, vec_splatsf4(_EXPF_H_C2), vec_madd(g, vec_splatsf4(_EXPF_H_C1), x)); - vec_float4 z = vec_madd(g, g, zeros); - vec_float4 a = vec_madd(z, vec_splatsf4(0.0999748594f), zeros); - vec_float4 b = vec_madd(g, - vec_madd(z, - vec_splatsf4(0.0083208258f), - vec_splatsf4(0.4999999992f) - ), - zeros); + g = vec_madd(g, __vec_splatsf4(__EXPF_C2), vec_madd(g, __vec_splatsf4(__EXPF_C1), x)); + vector float z = vec_madd(g, g, zeros); + vector float a = vec_madd(z, __vec_splatsf4(0.0999748594f), zeros); + vector float b = vec_madd(g, + vec_madd(z, + __vec_splatsf4(0.0083208258f), + __vec_splatsf4(0.4999999992f)), + zeros); - vec_float4 foo = divf4(vec_add(vec_splatsf4(1.0f), vec_add(a, b)), - vec_add(vec_splatsf4(1.0f), vec_sub(a, b))); - - return ldexpf4(foo, xexp); + vector float foo = _divf4(vec_add(__vec_splatsf4(1.0f), vec_add(a, b)), + vec_add(__vec_splatsf4(1.0f), vec_sub(a, b))); + return _ldexpf4(foo, xexp); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/expm1f4.c b/Extras/simdmathlibrary/ppu/simdmath/expm1f4.h similarity index 66% rename from Extras/simdmathlibrary/ppu/expm1f4.c rename to Extras/simdmathlibrary/ppu/simdmath/expm1f4.h index d81942f00..4bc566fe4 100644 --- a/Extras/simdmathlibrary/ppu/expm1f4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/expm1f4.h @@ -27,31 +27,37 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXPM1F4_H___ +#define ___SIMD_MATH_EXPM1F4_H___ + #include #include -#include "common-types.h" +#include -#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f) -#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f) +#define __EXPM1F_ln1by2 -0.6931471805599f +#define __EXPM1F_ln3by2 0.4054651081082f -vector float -expm1f4 (vector float x) +static inline vector float +_expm1f4 (vector float x) { - vec_float4 zeros = vec_splatsf4(0.0f); - vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(_EXPM1F_H_ln1by2)), - vec_cmpgt(vec_splatsf4(_EXPM1F_H_ln3by2), x)); - vec_float4 x2 = vec_madd(x,x,zeros); - vec_float4 d0, d1, n0, n1; + vector float zeros = __vec_splatsf4(0.0f); + vector unsigned int nearzeromask = + (vector unsigned int)vec_and(vec_cmpgt(x, __vec_splatsf4(__EXPM1F_ln1by2)), + vec_cmpgt(__vec_splatsf4(__EXPM1F_ln3by2), x)); + vector float x2 = vec_madd(x,x,zeros); + vector float d0, d1, n0, n1; - d0 = vec_madd(x , vec_splatsf4(-0.3203561199f), vec_splatsf4(0.9483177697f)); - d1 = vec_madd(x2, vec_splatsf4( 0.0326527809f), d0); + d0 = vec_madd(x , __vec_splatsf4(-0.3203561199f), __vec_splatsf4(0.9483177697f)); + d1 = vec_madd(x2, __vec_splatsf4( 0.0326527809f), d0); - n0 = vec_madd(x , vec_splatsf4(0.1538026623f), vec_splatsf4(0.9483177732f)); - n1 = vec_madd(x , vec_splatsf4(0.0024490478f), vec_splatsf4(0.0305274668f)); + n0 = vec_madd(x , __vec_splatsf4(0.1538026623f), __vec_splatsf4(0.9483177732f)); + n1 = vec_madd(x , __vec_splatsf4(0.0024490478f), __vec_splatsf4(0.0305274668f)); n1 = vec_madd(x2, n1, n0); - return vec_sel(vec_sub(expf4(x), vec_splatsf4(1.0f)), + return vec_sel(vec_sub(expf4(x), __vec_splatsf4(1.0f)), vec_madd(x, divf4(n1, d1), zeros), nearzeromask); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/fabsf4.h b/Extras/simdmathlibrary/ppu/simdmath/fabsf4.h new file mode 100644 index 000000000..e2a3fc953 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/fabsf4.h @@ -0,0 +1,42 @@ +/* fabsf4 - for each of four float slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FABSF4_H___ +#define ___SIMD_MATH_FABSF4_H___ + +#include +#include + +static inline vector float +_fabsf4(vector float x) +{ + return vec_abs( x ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/fdimf4.h b/Extras/simdmathlibrary/ppu/simdmath/fdimf4.h new file mode 100644 index 000000000..f6b9407b3 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/fdimf4.h @@ -0,0 +1,45 @@ +/* fdimf - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FDIMF4_H___ +#define ___SIMD_MATH_FDIMF4_H___ + +#include +#include + +#include + +static inline vector float +_fdimf4 (vector float x, vector float y) +{ + vector float diff = vec_sub(x,y); + return vec_sel(__vec_splatsf4(0.0f), diff, vec_cmpgt(x,y)); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/floorf4.h b/Extras/simdmathlibrary/ppu/simdmath/floorf4.h new file mode 100644 index 000000000..099c7a9c6 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/floorf4.h @@ -0,0 +1,43 @@ +/* floorf4 - for each of four float slots, round down to largest integer not greater than the value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FLOORF4_H___ +#define ___SIMD_MATH_FLOORF4_H___ + +#include +#include + + +static inline vector float +_floorf4 (vector float x) +{ + return vec_floor( x ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/fmaf4.h b/Extras/simdmathlibrary/ppu/simdmath/fmaf4.h new file mode 100644 index 000000000..08262f010 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/fmaf4.h @@ -0,0 +1,42 @@ +/* fmaf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FMAF4_H___ +#define ___SIMD_MATH_FMAF4_H___ + +#include +#include + +static inline vector float +_fmaf4 (vector float x, vector float y, vector float z) +{ + return vec_madd(x,y,z); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/fmaxf4.h b/Extras/simdmathlibrary/ppu/simdmath/fmaxf4.h new file mode 100644 index 000000000..e5932d14a --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/fmaxf4.h @@ -0,0 +1,43 @@ +/* fmaxf4 - for each of four float slots, compute maximum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FMAXF4_H___ +#define ___SIMD_MATH_FMAXF4_H___ + +#include +#include + + +static inline vector float +_fmaxf4 (vector float x, vector float y) +{ + return vec_max( x, y ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/fminf4.h b/Extras/simdmathlibrary/ppu/simdmath/fminf4.h new file mode 100644 index 000000000..1bf0f0927 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/fminf4.h @@ -0,0 +1,43 @@ +/* fminf4 - for each of four float slots, compute minimum of x and y + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FMINF4_H___ +#define ___SIMD_MATH_FMINF4_H___ + +#include +#include + + +static inline vector float +_fminf4 (vector float x, vector float y) +{ + return vec_min( x, y ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/fmodf4.c b/Extras/simdmathlibrary/ppu/simdmath/fmodf4.h similarity index 69% rename from Extras/simdmathlibrary/ppu/fmodf4.c rename to Extras/simdmathlibrary/ppu/simdmath/fmodf4.h index 0210fcf88..37292d35e 100644 --- a/Extras/simdmathlibrary/ppu/fmodf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/fmodf4.h @@ -27,56 +27,62 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMODF4_H___ +#define ___SIMD_MATH_FMODF4_H___ + #include #include -#include "common-types.h" +#include +#include +#include // // This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. // If x == 0, the result is 0. // If x != 0 and y == 0, the result is undefined. -vector float -fmodf4 (vector float x, vector float y) +static inline vector float +_fmodf4 (vector float x, vector float y) { - vec_float4 q, xabs, yabs, qabs, xabs2; - vec_int4 qi0, qi1, qi2; - vec_float4 i0, i1, i2, r1, r2, i; - vec_uint4 inrange; + vector float q, xabs, yabs, qabs, xabs2; + vector signed int qi0, qi1, qi2; + vector float i0, i1, i2, r1, r2, i; + vector unsigned int inrange; - // Find i = truncated_integer(|x/y|) + // Find i = truncated_integer(|x/y|) - // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. - // Thus i is either the truncated quotient, one less, or one greater. + // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. + // Thus i is either the truncated quotient, one less, or one greater. - q = divf4( x, y ); - xabs = fabsf4( x ); - yabs = fabsf4( y ); - qabs = fabsf4( q ); - xabs2 = vec_add( xabs, xabs ); + q = _divf4( x, y ); + xabs = _fabsf4( x ); + yabs = _fabsf4( y ); + qabs = _fabsf4( q ); + xabs2 = vec_add( xabs, xabs ); - inrange = (vec_uint4)vec_cmpgt( (vec_float4)(vec_splatsu4(0x49800000)), qabs ); + inrange = (vector unsigned int)vec_cmpgt( (vector float)(__vec_splatsu4(0x49800000)), qabs ); - qi1 = vec_cts( qabs, 0 ); - qi0 = vec_add( qi1, ((vec_int4){-1, -1, -1, -1}) ); - qi2 = vec_add( qi1, ((vec_int4){1, 1, 1, 1}) ); + qi1 = vec_cts( qabs, 0 ); + qi0 = vec_add( qi1, __vec_splatsi4(-1) ); + qi2 = vec_add( qi1, __vec_splatsi4(1) ); - i0 = vec_ctf( qi0, 0 ); - i1 = vec_ctf( qi1, 0 ); - i2 = vec_ctf( qi2, 0 ); + i0 = vec_ctf( qi0, 0 ); + i1 = vec_ctf( qi1, 0 ); + i2 = vec_ctf( qi2, 0 ); - // Correct i will be the largest one such that |x| - i*|y| >= 0. + // Correct i will be the largest one such that |x| - i*|y| >= 0. - r1 = vec_nmsub( i1, yabs, xabs ); - r2 = vec_nmsub( i2, yabs, xabs ); + r1 = vec_nmsub( i1, yabs, xabs ); + r2 = vec_nmsub( i2, yabs, xabs ); - i = i0; - i = vec_sel( i1, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r1 ) ); - i = vec_sel( i2, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r2 ) ); + i = i0; + i = vec_sel( i1, i, vec_cmpgt( __vec_splatsi4(0), (vector signed int)r1 ) ); + i = vec_sel( i2, i, vec_cmpgt( __vec_splatsi4(0), (vector signed int)r2 ) ); - i = copysignf4( i, q ); + i = _copysignf4( i, q ); - return vec_sel( vec_splatsf4(0.0f), vec_nmsub( i, y, x ), inrange ); + return vec_sel( __vec_splatsf4(0.0f), vec_nmsub( i, y, x ), inrange ); } +#endif diff --git a/Extras/simdmathlibrary/ppu/frexpf4.c b/Extras/simdmathlibrary/ppu/simdmath/frexpf4.h similarity index 71% rename from Extras/simdmathlibrary/ppu/frexpf4.c rename to Extras/simdmathlibrary/ppu/simdmath/frexpf4.h index 61124a37c..0cb809ec7 100644 --- a/Extras/simdmathlibrary/ppu/frexpf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/frexpf4.h @@ -27,31 +27,28 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FREXPF4_H___ +#define ___SIMD_MATH_FREXPF4_H___ + #include #include -#include "common-types.h" +#include -vector float -frexpf4 (vector float x, vector signed int *exp) +static inline vector float +_frexpf4 (vector float x, vector signed int *exp) { - vec_int4 zeros = (vec_int4){0,0,0,0}; - vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros); + vector signed int zeros = __vec_splatsi4(0); + vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(x, (vector float)zeros); - vec_int4 expmask = vec_splatsi4(0x7F800000); - vec_int4 e1 = vec_and ( (vec_int4)x, expmask); - vec_int4 e2 = vec_sub(vec_sr(e1, vec_splatsu4(23) ), vec_splatsi4(126) ); + vector signed int expmask = __vec_splatsi4(0x7F800000); + vector signed int e1 = vec_and ( (vector signed int)x, expmask); + vector signed int e2 = vec_sub(vec_sr(e1, __vec_splatsu4(23) ), __vec_splatsi4(126) ); *exp = vec_sel(e2, zeros, zeromask); - vec_float4 m2 = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + vector float m2 = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask); - return vec_sel(m2, (vec_float4)zeros, zeromask); + return vec_sel(m2, (vector float)zeros, zeromask); } - -/* -{ - *exp = ((vec_int4)(0)); - return ((vec_float4)(0.0f)); -} -*/ +#endif diff --git a/Extras/simdmathlibrary/ppu/hypotf4.c b/Extras/simdmathlibrary/ppu/simdmath/hypotf4.h similarity index 85% rename from Extras/simdmathlibrary/ppu/hypotf4.c rename to Extras/simdmathlibrary/ppu/simdmath/hypotf4.h index c7677a5b1..1e692c34b 100644 --- a/Extras/simdmathlibrary/ppu/hypotf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/hypotf4.h @@ -27,15 +27,20 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_HYPOTF4_H___ +#define ___SIMD_MATH_HYPOTF4_H___ + #include #include -#include "common-types.h" +#include -vector float -hypotf4 (vector float x, vector float y) +static inline vector float +_hypotf4 (vector float x, vector float y) { - vec_float4 sum = vec_madd(x,x, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ); - sum = vec_madd(y,y,sum); - return sqrtf4(sum); + vector float sum = vec_madd( x, x, __vec_splatsf4(0.0f) ); + sum = vec_madd(y, y, sum); + return _sqrtf4(sum); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/ilogbf4.c b/Extras/simdmathlibrary/ppu/simdmath/ilogbf4.h similarity index 76% rename from Extras/simdmathlibrary/ppu/ilogbf4.c rename to Extras/simdmathlibrary/ppu/simdmath/ilogbf4.h index 86c269aeb..2ca828793 100644 --- a/Extras/simdmathlibrary/ppu/ilogbf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/ilogbf4.h @@ -27,21 +27,26 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ILOGBF4_H___ +#define ___SIMD_MATH_ILOGBF4_H___ + #include #include #include -#include "common-types.h" +#include -vector signed int -ilogbf4 (vector float x) +static inline vector signed int +_ilogbf4 (vector float x) { - vec_int4 minus127 = vec_splatsi4(-127); + vector signed int minus127 = __vec_splatsi4(-127); - vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000)); - vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0)); - vec_int4 e2 = vec_add(vec_sr(e1,vec_splatsu4(23)), minus127); + vector signed int e1 = vec_and((vector signed int)x, __vec_splatsi4(0x7F800000)); + vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(e1, __vec_splatsi4(0)); + vector signed int e2 = vec_add(vec_sr(e1,__vec_splatsu4(23)), minus127); - return vec_sel(e2, vec_splatsi4(FP_ILOGB0), zeromask); + return vec_sel(e2, __vec_splatsi4(FP_ILOGB0), zeromask); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/ldexpf4.c b/Extras/simdmathlibrary/ppu/simdmath/ldexpf4.h similarity index 59% rename from Extras/simdmathlibrary/ppu/ldexpf4.c rename to Extras/simdmathlibrary/ppu/simdmath/ldexpf4.h index b542effca..05a61a2a3 100644 --- a/Extras/simdmathlibrary/ppu/ldexpf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/ldexpf4.h @@ -27,32 +27,37 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LDEXPF4_H___ +#define ___SIMD_MATH_LDEXPF4_H___ + #include #include -#include "common-types.h" +#include -vector float -ldexpf4 (vector float x, vector signed int exp) +static inline vector float +_ldexpf4 (vector float x, vector signed int exp) { - vec_int4 zeros = vec_splatsi4(0); + vector signed int zeros = __vec_splatsi4(0); - vec_int4 expmask = vec_splatsi4(0x7F800000); - vec_int4 e1 = vec_and((vec_int4)x, expmask); - vec_int4 e2 = vec_sr(e1,vec_splatsu4(23)); + vector signed int expmask = __vec_splatsi4(0x7F800000); + vector signed int e1 = vec_and((vector signed int)x, expmask); + vector signed int e2 = vec_sr(e1,__vec_splatsu4(23)); - vec_uint4 maxmask = (vec_uint4)vec_cmpgt(exp, vec_splatsi4(255)); - vec_uint4 minmask = (vec_uint4)vec_cmpgt(vec_splatsi4(-255), exp); - minmask = vec_or (minmask, (vec_uint4)vec_cmpeq(x, (vec_float4)zeros)); + vector unsigned int maxmask = (vector unsigned int)vec_cmpgt(exp, __vec_splatsi4(255)); + vector unsigned int minmask = (vector unsigned int)vec_cmpgt(__vec_splatsi4(-255), exp); + minmask = vec_or (minmask, (vector unsigned int)vec_cmpeq(x, (vector float)zeros)); - vec_int4 esum = vec_add(e2, exp); + vector signed int esum = vec_add(e2, exp); - maxmask = vec_or (maxmask, (vec_uint4)vec_cmpgt(esum, vec_splatsi4(255))); - maxmask = vec_and(maxmask, vec_splatsu4(0x7FFFFFFF)); - minmask = vec_or (minmask, (vec_uint4)vec_cmpgt(zeros, esum)); + maxmask = vec_or (maxmask, (vector unsigned int)vec_cmpgt(esum, __vec_splatsi4(255))); + maxmask = vec_and(maxmask, __vec_splatsu4(0x7FFFFFFF)); + minmask = vec_or (minmask, (vector unsigned int)vec_cmpgt(zeros, esum)); - x = vec_sel(x, (vec_float4)vec_sl(esum,vec_splatsu4(23)), (vec_uint4)expmask); - x = vec_sel(x, (vec_float4)zeros, minmask); - x = vec_sel(x, (vec_float4)maxmask, maxmask); + x = vec_sel(x, (vector float)vec_sl(esum,__vec_splatsu4(23)), (vector unsigned int)expmask); + x = vec_sel(x, (vector float)zeros, minmask); + x = vec_sel(x, (vector float)maxmask, maxmask); return x; } + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/log10f4.h b/Extras/simdmathlibrary/ppu/simdmath/log10f4.h new file mode 100644 index 000000000..9eeaf381e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/log10f4.h @@ -0,0 +1,83 @@ +/* log10f4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_LOG10F4_H___ +#define ___SIMD_MATH_LOG10F4_H___ + +#include +#include + +#include + +#define __LOG10F_loga2msb 0.3010299205780f +#define __LOG10F_loga2lsb 7.5085978266e-8f +#define __LOG10F_logaemsb 0.4342944622040f +#define __LOG10F_logaelsb 1.9699272335e-8f +#define __LOG10F_neglogae -0.4342944819033f + +#define __LOG10F_c0 -0.2988439998f +#define __LOG10F_c1 -0.3997655209f +#define __LOG10F_c2 -0.6666679125f + +static inline vector float +_log10f4 (vector float x) +{ + vector signed int zeros = __vec_splatsi4(0); + vector float ones = __vec_splatsf4(1.0f); + + vector signed int expmask = __vec_splatsi4(0x7F800000); + vector signed int xexp = + vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) ); + x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask); + + vector unsigned int mask = (vector unsigned int)vec_cmpgt( __vec_splatsf4(0.7071067811865f), x); + x = vec_sel(x , vec_add(x, x) , mask); + xexp = vec_sel(xexp, vec_sub(xexp, __vec_splatsi4(1)), mask); + + vector float x1 = vec_sub(x , ones); + vector float z = _divf4 (x1, vec_add(x, ones)); + vector float w = vec_madd(z , z, (vector float)zeros); + vector float polywneg; + polywneg = vec_madd(__vec_splatsf4(__LOG10F_c0), w, __vec_splatsf4(__LOG10F_c1)); + polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOG10F_c2)); + + vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros); + vector float wnew = vec_ctf(xexp,0); + + vector float zz1 = vec_madd(__vec_splatsf4(__LOG10F_logaemsb), x1, + vec_madd(__vec_splatsf4(__LOG10F_loga2msb),wnew,(vector float)zeros)); + vector float zz2 = vec_madd(__vec_splatsf4(__LOG10F_logaelsb), x1, + vec_madd(__vec_splatsf4(__LOG10F_loga2lsb), wnew, + vec_madd(__vec_splatsf4(__LOG10F_neglogae),y,(vector float)zeros)) + ); + + return vec_add(zz1, zz2); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/log1pf4.c b/Extras/simdmathlibrary/ppu/simdmath/log1pf4.h similarity index 66% rename from Extras/simdmathlibrary/ppu/log1pf4.c rename to Extras/simdmathlibrary/ppu/simdmath/log1pf4.h index 3ac1971ea..16a9de46e 100644 --- a/Extras/simdmathlibrary/ppu/log1pf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/log1pf4.h @@ -27,28 +27,35 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOG1PF4_H___ +#define ___SIMD_MATH_LOG1PF4_H___ + #include #include -#include "common-types.h" +#include +#include -vector float -log1pf4 (vector float x) +static inline vector float +_log1pf4 (vector float x) { - vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(-0.5f)), - vec_cmpgt(vec_splatsf4(0.5f), x)); - vec_float4 x2 = vec_madd(x,x,vec_splatsf4(0.0f)); - vec_float4 d0, d1, n0, n1; + vector unsigned int nearzeromask = + (vector unsigned int)vec_and(vec_cmpgt(x, __vec_splatsf4(-0.5f)), + vec_cmpgt(__vec_splatsf4(0.5f), x)); + vector float x2 = vec_madd(x,x,__vec_splatsf4(0.0f)); + vector float d0, d1, n0, n1; - d0 = vec_madd(x , vec_splatsf4(1.5934420741f), vec_splatsf4(0.8952856868f)); - d1 = vec_madd(x , vec_splatsf4(0.1198195734f), vec_splatsf4(0.8377145063f)); + d0 = vec_madd(x , __vec_splatsf4(1.5934420741f), __vec_splatsf4(0.8952856868f)); + d1 = vec_madd(x , __vec_splatsf4(0.1198195734f), __vec_splatsf4(0.8377145063f)); d1 = vec_madd(x2, d1, d0); - n0 = vec_madd(x , vec_splatsf4(1.1457993413f), vec_splatsf4(0.8952856678f)); - n1 = vec_madd(x , vec_splatsf4(0.0082862580f), vec_splatsf4(0.3394238808f)); + n0 = vec_madd(x , __vec_splatsf4(1.1457993413f), __vec_splatsf4(0.8952856678f)); + n1 = vec_madd(x , __vec_splatsf4(0.0082862580f), __vec_splatsf4(0.3394238808f)); n1 = vec_madd(x2, n1, n0); - return vec_sel(logf4(vec_add(x, vec_splatsf4(1.0f))), - vec_madd(x, divf4(n1, d1), vec_splatsf4(0.0f)), + return vec_sel(_logf4(vec_add(x, __vec_splatsf4(1.0f))), + vec_madd(x, _divf4(n1, d1), __vec_splatsf4(0.0f)), nearzeromask); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/log2f4.c b/Extras/simdmathlibrary/ppu/simdmath/log2f4.h similarity index 52% rename from Extras/simdmathlibrary/ppu/log2f4.c rename to Extras/simdmathlibrary/ppu/simdmath/log2f4.h index 1d065ff4d..a8baca1a1 100644 --- a/Extras/simdmathlibrary/ppu/log2f4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/log2f4.h @@ -27,54 +27,51 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOG2F4_H___ +#define ___SIMD_MATH_LOG2F4_H___ + #include #include -#include "common-types.h" +#include -#define _LOG2F_H_l2emsb ((float) 1.4426950216293f) -#define _LOG2F_H_l2elsb ((float) 1.9259629911e-8f) -#define _LOG2F_H_negl2e ((float)-1.4426950408890f) +#define __LOG2F_l2emsb 1.4426950216293f +#define __LOG2F_l2elsb 1.9259629911e-8f +#define __LOG2F_negl2e -1.4426950408890f -#define _LOG2F_H_c0 ((float)(-0.2988439998f)) -#define _LOG2F_H_c1 ((float)(-0.3997655209f)) -#define _LOG2F_H_c2 ((float)(-0.6666679125f)) +#define __LOG2F_c0 -0.2988439998f +#define __LOG2F_c1 -0.3997655209f +#define __LOG2F_c2 -0.6666679125f -vector float -log2f4 (vector float x) +static inline vector float +_log2f4 (vector float x) { - vec_int4 zeros = vec_splatsi4(0); - vec_float4 ones = vec_splatsf4(1.0f); - //vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros); + vector signed int zeros = __vec_splatsi4(0); + vector float ones = __vec_splatsf4(1.0f); - vec_int4 expmask = vec_splatsi4(0x7F800000); - vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); - x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + vector signed int expmask = __vec_splatsi4(0x7F800000); + vector signed int xexp = + vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) ); + x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask); - vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x); + vector unsigned int mask = (vector unsigned int)vec_cmpgt( __vec_splatsf4(0.7071067811865f), x); x = vec_sel(x , vec_add(x, x) , mask); - xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1) ), mask); + xexp = vec_sel(xexp, vec_sub(xexp, __vec_splatsi4(1) ), mask); - vec_float4 x1 = vec_sub(x , ones); - vec_float4 z = divf4(x1, vec_add(x, ones)); - vec_float4 w = vec_madd(z , z, (vec_float4)zeros); - vec_float4 polywneg; - polywneg = vec_madd(vec_splatsf4(_LOG2F_H_c0), w, vec_splatsf4(_LOG2F_H_c1)); - polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG2F_H_c2)); + vector float x1 = vec_sub(x , ones); + vector float z = _divf4(x1, vec_add(x, ones)); + vector float w = vec_madd(z , z, (vector float)zeros); + vector float polywneg; + polywneg = vec_madd(__vec_splatsf4(__LOG2F_c0), w, __vec_splatsf4(__LOG2F_c1)); + polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOG2F_c2)); - vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); - vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG2F_H_l2emsb), x1, vec_ctf(xexp,0)); - vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG2F_H_l2elsb), x1, - vec_madd(vec_splatsf4(_LOG2F_H_negl2e), y, (vec_float4)zeros) - ); + vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros); + vector float zz1 = vec_madd(__vec_splatsf4(__LOG2F_l2emsb), x1, vec_ctf(xexp,0)); + vector float zz2 = vec_madd(__vec_splatsf4(__LOG2F_l2elsb), x1, + vec_madd(__vec_splatsf4(__LOG2F_negl2e), y, (vector float)zeros)); - //return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask); return vec_add(zz1,zz2); } -/* -{ - return ((vec_float4)(0.0f)); -} -*/ +#endif diff --git a/Extras/simdmathlibrary/ppu/logbf4.c b/Extras/simdmathlibrary/ppu/simdmath/logbf4.h similarity index 78% rename from Extras/simdmathlibrary/ppu/logbf4.c rename to Extras/simdmathlibrary/ppu/simdmath/logbf4.h index fac06165e..b6f43d753 100644 --- a/Extras/simdmathlibrary/ppu/logbf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/logbf4.h @@ -27,18 +27,22 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOGBF4_H___ +#define ___SIMD_MATH_LOGBF4_H___ + #include #include #include -#include "common-types.h" +#include -vector float -logbf4 (vector float x) +static inline vector float +_logbf4 (vector float x) { - vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000)); - vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0)); - e1 = vec_sub(e1, vec_splatsi4(0x3F800000)); - return vec_sel(vec_ctf(e1,23), vec_splatsf4(-HUGE_VALF), zeromask); + vector signed int e1 = vec_and((vector signed int)x, __vec_splatsi4(0x7F800000)); + vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(e1, __vec_splatsi4(0)); + e1 = vec_sub(e1, __vec_splatsi4(0x3F800000)); + return vec_sel(vec_ctf(e1,23), __vec_splatsf4(-HUGE_VALF), zeromask); } +#endif diff --git a/Extras/simdmathlibrary/ppu/logf4.c b/Extras/simdmathlibrary/ppu/simdmath/logf4.h similarity index 54% rename from Extras/simdmathlibrary/ppu/logf4.c rename to Extras/simdmathlibrary/ppu/simdmath/logf4.h index 8b915c6a8..c6d66d633 100644 --- a/Extras/simdmathlibrary/ppu/logf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/logf4.h @@ -27,47 +27,51 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOGF4_H___ +#define ___SIMD_MATH_LOGF4_H___ + #include #include -#include "common-types.h" +#include -#define _LOGF_H_ln2msb ((float)(0.6931470632553f)) -#define _LOGF_H_negln2lsb ((float)(-1.1730463525e-7f)) +#define __LOGF_ln2msb 0.6931470632553f +#define __LOGF_negln2lsb -1.1730463525e-7f -#define _LOGF_H_c0 ((float)(-0.2988439998f)) -#define _LOGF_H_c1 ((float)(-0.3997655209f)) -#define _LOGF_H_c2 ((float)(-0.6666679125f)) +#define __LOGF_c0 -0.2988439998f +#define __LOGF_c1 -0.3997655209f +#define __LOGF_c2 -0.6666679125f -vector float -logf4 (vector float x) +static inline vector float +_logf4 (vector float x) { - vec_int4 zeros = vec_splatsi4(0); - vec_float4 ones = vec_splatsf4(1.0f); - //vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros); + vector signed int zeros = __vec_splatsi4(0); + vector float ones = __vec_splatsf4(1.0f); - vec_int4 expmask = vec_splatsi4(0x7F800000); - vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) ); - x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask); + vector signed int expmask = __vec_splatsi4(0x7F800000); + vector signed int xexp = + vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) ); + x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask); - vec_uint4 mask = (vec_uint4)vec_cmpgt(vec_splatsf4((float)0.7071067811865f), x); + vector unsigned int mask = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.7071067811865f), x); x = vec_sel(x , vec_add(x, x) , mask); - xexp = vec_sel(xexp, vec_sub(xexp,vec_splatsi4(1)), mask); + xexp = vec_sel(xexp, vec_sub(xexp,__vec_splatsi4(1)), mask); - vec_float4 x1 = vec_sub(x , ones); - vec_float4 z = divf4 (x1, vec_add(x, ones)); - vec_float4 w = vec_madd(z , z, (vec_float4)zeros); - vec_float4 polywneg; - polywneg = vec_madd(vec_splatsf4(_LOGF_H_c0), w, vec_splatsf4(_LOGF_H_c1)); - polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOGF_H_c2)); + vector float x1 = vec_sub(x , ones); + vector float z = _divf4 (x1, vec_add(x, ones)); + vector float w = vec_madd(z , z, (vector float)zeros); + vector float polywneg; + polywneg = vec_madd(__vec_splatsf4(__LOGF_c0), w, __vec_splatsf4(__LOGF_c1)); + polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOGF_c2)); - vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros); - vec_float4 wnew = vec_ctf(xexp,0); - vec_float4 zz1 = vec_madd(vec_splatsf4(_LOGF_H_ln2msb) , wnew, x1); - vec_float4 zz2neg = vec_madd(vec_splatsf4(_LOGF_H_negln2lsb), wnew, y ); + vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros); + vector float wnew = vec_ctf(xexp,0); + vector float zz1 = vec_madd(__vec_splatsf4(__LOGF_ln2msb) , wnew, x1); + vector float zz2neg = vec_madd(__vec_splatsf4(__LOGF_negln2lsb), wnew, y ); - //return vec_sel(vec_sub(zz1,zz2neg), (vec_float4)zeromask, zeromask); return vec_sub(zz1,zz2neg); } + +#endif diff --git a/Extras/simdmathlibrary/ppu/modff4.c b/Extras/simdmathlibrary/ppu/simdmath/modff4.h similarity index 85% rename from Extras/simdmathlibrary/ppu/modff4.c rename to Extras/simdmathlibrary/ppu/simdmath/modff4.h index 54ff1ba72..3d6f7e786 100644 --- a/Extras/simdmathlibrary/ppu/modff4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/modff4.h @@ -27,21 +27,27 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_MODFF4_H___ +#define ___SIMD_MATH_MODFF4_H___ + #include #include +#include + // modff4 - for each of four float slots, compute fractional and integral parts. // Returns fractional part and stores integral part in *iptr. -vector float -modff4 (vector float x, vector float *iptr) +static inline vector float +_modff4 (vector float x, vector float *iptr) { - vector float integral, fraction; + vector float integral, fraction; - integral = truncf4( x ); - fraction = vec_sub( x, integral ); + integral = _truncf4( x ); + fraction = vec_sub( x, integral ); - *iptr = integral; - return fraction; + *iptr = integral; + return fraction; } +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/negatef4.h b/Extras/simdmathlibrary/ppu/simdmath/negatef4.h new file mode 100644 index 000000000..292b6ab8f --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/negatef4.h @@ -0,0 +1,44 @@ +/* negatef4 - for each of four float slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEGATEF4_H___ +#define ___SIMD_MATH_NEGATEF4_H___ + +#include +#include + +#include + +static inline vector float +_negatef4 (vector float x) +{ + return (vector float)vec_xor( (vector unsigned int)x, __vec_splatsu4(0x80000000) ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/negatei4.h b/Extras/simdmathlibrary/ppu/simdmath/negatei4.h new file mode 100644 index 000000000..57ddd6577 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/negatei4.h @@ -0,0 +1,45 @@ +/* negatei4 - for each of four int slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEGATEI4_H___ +#define ___SIMD_MATH_NEGATEI4_H___ + +#include +#include + +#include + +static inline vector signed int +_negatei4 (vector signed int x) +{ + vector signed int zero = __vec_splatsi4(0); + return vec_sub (zero, x); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/powf4.h b/Extras/simdmathlibrary/ppu/simdmath/powf4.h new file mode 100644 index 000000000..d7fcbfdd4 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/powf4.h @@ -0,0 +1,61 @@ +/* exp2f4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_POWF4_H___ +#define ___SIMD_MATH_POWF4_H___ + +#include +#include +#include + +#include +#include + +static inline vector float +_powf4 (vector float x, vector float y) +{ + vector signed int zeros = __vec_splatsi4(0); + vector unsigned int zeromask = (vector unsigned int)vec_cmpeq((vector float)zeros, x); + + vector unsigned int negmask = (vector unsigned int)vec_cmpgt((vector float)zeros, x); + + vector float sbit = (vector float)(__vec_splatsi4(0x80000000)); + vector float absx = vec_andc(x, sbit); + vector float absy = vec_andc(y, sbit); + vector unsigned int oddy = vec_and(vec_ctu(absy, 0), __vec_splatsu4(0x00000001)); + negmask = vec_and(negmask, (vector unsigned int)vec_cmpgt(oddy, (vector unsigned int)zeros)); + + vector float res = _exp2f4(vec_madd(y, _log2f4(absx), (vector float)zeros)); + res = vec_sel(res, vec_or(sbit, res), negmask); + + + return vec_sel(res, (vector float)zeros, zeromask); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/recipf4.c b/Extras/simdmathlibrary/ppu/simdmath/recipf4.h similarity index 83% rename from Extras/simdmathlibrary/ppu/recipf4.c rename to Extras/simdmathlibrary/ppu/simdmath/recipf4.h index 38f0f1550..a570986e0 100644 --- a/Extras/simdmathlibrary/ppu/recipf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/recipf4.h @@ -27,20 +27,24 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_RECIPF4_H___ +#define ___SIMD_MATH_RECIPF4_H___ + #include #include -#include "common-types.h" +#include -vector float -recipf4 (vector float x) +static inline vector float +_recipf4 (vector float x) { - // Reciprocal estimate and 1 Newton-Raphson iteration. + // Reciprocal estimate and 1 Newton-Raphson iteration. - vec_float4 y0; - vec_float4 ones = ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f}); + vector float y0; + vector float ones = __vec_splatsf4(1.0f); - y0 = vec_re( x ); - return vec_madd( vec_nmsub( x, y0, ones), y0, y0 ); + y0 = vec_re( x ); + return vec_madd( vec_nmsub( x, y0, ones), y0, y0 ); } +#endif diff --git a/Extras/simdmathlibrary/ppu/rsqrtf4.c b/Extras/simdmathlibrary/ppu/simdmath/rsqrtf4.h similarity index 76% rename from Extras/simdmathlibrary/ppu/rsqrtf4.c rename to Extras/simdmathlibrary/ppu/simdmath/rsqrtf4.h index a78e52a1a..5fd34956f 100644 --- a/Extras/simdmathlibrary/ppu/rsqrtf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/rsqrtf4.h @@ -27,27 +27,31 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_RSQRTF4_H___ +#define ___SIMD_MATH_RSQRTF4_H___ + #include #include -#include "common-types.h" +#include // rsqrtf4 - for each of four float slots, compute reciprocal square root. // Undefined if input < 0. -vector float -rsqrtf4 (vector float x) +static inline vector float +_rsqrtf4 (vector float x) { - // Reciprocal square root estimate and 1 Newton-Raphson iteration. + // Reciprocal square root estimate and 1 Newton-Raphson iteration. - vector float zero = vec_splatsf4(0.0f); - vector float half = vec_splatsf4(0.5f); - vector float one = vec_splatsf4(1.0f); - vector float y0, y0x, y0half; + vector float zero = __vec_splatsf4(0.0f); + vector float half = __vec_splatsf4(0.5f); + vector float one = __vec_splatsf4(1.0f); + vector float y0, y0x, y0half; - y0 = vec_rsqrte( x ); - y0x = vec_madd( y0, x, zero ); - y0half = vec_madd( y0, half, zero ); - return vec_madd( vec_nmsub( y0, y0x, one ), y0half, y0 ); + y0 = vec_rsqrte( x ); + y0x = vec_madd( y0, x, zero ); + y0half = vec_madd( y0, half, zero ); + return vec_madd( vec_nmsub( y0, y0x, one ), y0half, y0 ); } +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/sincosf4.h b/Extras/simdmathlibrary/ppu/simdmath/sincosf4.h new file mode 100644 index 000000000..0a0bc3e74 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/sincosf4.h @@ -0,0 +1,113 @@ +/* sincosf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SINCOSF4_H___ +#define ___SIMD_MATH_SINCOSF4_H___ + +#include +#include + +#include +#include + +// +// Computes both the sine and cosine of the all four slots of x +// by using a polynomial approximation. +// +static inline void +_sincosf4 (vector float x, vector float *s, vector float *c) +{ + vector float xl,xl2,xl3; + vector signed int q; + vector signed int offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = vec_and(q,__vec_splatsi4((int)0x3)); + offsetCos = vec_add(__vec_splatsi4(1),offsetSin); + + // Remainder in range [-pi/4..pi/4] + // + vector float qf = vec_ctf(q,0); + vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x); + xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0),xl2,__vec_splatsf4(__SINCOSF_CC1)); + vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0),xl2,__vec_splatsf4(__SINCOSF_SC1)); + + vector float ct2 = vec_madd(ct1,xl2,__vec_splatsf4(__SINCOSF_CC2)); + vector float st2 = vec_madd(st1,xl2,__vec_splatsf4(__SINCOSF_SC2)); + + vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f)); + vector float sx = vec_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vector unsigned int sinMask = + (vector unsigned int)vec_cmpeq(vec_and(offsetSin,__vec_splatsi4(0x1)),__vec_splatsi4(0)); + vector unsigned int cosMask = + (vector unsigned int)vec_cmpeq(vec_and(offsetCos,__vec_splatsi4(0x1)),__vec_splatsi4(0)); + *s = vec_sel(cx,sx,sinMask); + *c = vec_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = (vector unsigned int)vec_cmpeq(vec_and(offsetSin,__vec_splatsi4(0x2)),__vec_splatsi4(0)); + cosMask = (vector unsigned int)vec_cmpeq(vec_and(offsetCos,__vec_splatsi4(0x2)),__vec_splatsi4(0)); + + *s = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)*s),*s,sinMask); + *c = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)*c),*c,cosMask); + +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/sinf4.h b/Extras/simdmathlibrary/ppu/simdmath/sinf4.h new file mode 100644 index 000000000..12e3ec412 --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/sinf4.h @@ -0,0 +1,107 @@ +/* sinf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SINF4_H___ +#define ___SIMD_MATH_SINF4_H___ + +#include +#include + +#include +#include +// +// Computes the sine of each of the four slots +// by using a polynomial approximation. +// + +static inline vector float +_sinf4 (vector float x) +{ + vector float xl,xl2,xl3,res; + vector signed int q; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); + + + // Compute an offset based on the quadrant that the angle falls in + // + vector signed int offset = vec_and(q,__vec_splatsi4((int)0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vector float qf = vec_ctf(q,0); + vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x); + xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f)); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0),xl2,__vec_splatsf4(__SINCOSF_CC1)); + vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0),xl2,__vec_splatsf4(__SINCOSF_SC1)); + + vector float ct2 = vec_madd(ct1,xl2,__vec_splatsf4(__SINCOSF_CC2)); + vector float st2 = vec_madd(st1,xl2,__vec_splatsf4(__SINCOSF_SC2)); + + vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f)); + vector float sx = vec_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vector unsigned int mask1 = (vector unsigned int)vec_cmpeq(vec_and(offset, + __vec_splatsi4(0x1)), + __vec_splatsi4((int)(0))); + res = vec_sel(cx,sx,mask1); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + vector unsigned int mask2 = + (vector unsigned int)vec_cmpeq(vec_and(offset,__vec_splatsi4(0x2)),__vec_splatsi4((int)0)); + res = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000U),(vector unsigned int)res),res,mask2); + + return res; +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/sqrtf4.c b/Extras/simdmathlibrary/ppu/simdmath/sqrtf4.h similarity index 72% rename from Extras/simdmathlibrary/ppu/sqrtf4.c rename to Extras/simdmathlibrary/ppu/simdmath/sqrtf4.h index e40b4c841..3b4a0d17c 100644 --- a/Extras/simdmathlibrary/ppu/sqrtf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/sqrtf4.h @@ -27,27 +27,33 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_SQRTF4_H___ +#define ___SIMD_MATH_SQRTF4_H___ + #include #include +#include + // sqrtf4 - for each of four float slots, compute square root. // Undefined if input < 0. -vector float -sqrtf4 (vector float x) +static inline vector float +_sqrtf4 (vector float x) { - // Reciprocal square root estimate and 1 Newton-Raphson iteration. + // Reciprocal square root estimate and 1 Newton-Raphson iteration. - vector float zero = (vector float){0.0f, 0.0f, 0.0f, 0.0f}; - vector float half = (vector float){0.5f, 0.5f, 0.5f, 0.5f}; - vector float one = (vector float){1.0f, 1.0f, 1.0f, 1.0f}; - vector float y0, y0x, y0xhalf; - vector unsigned int cmp_zero; + vector float zero = __vec_splatsf4(0.0f); + vector float half = __vec_splatsf4(0.5f); + vector float one = __vec_splatsf4(1.0f); + vector float y0, y0x, y0xhalf; + vector unsigned int cmp_zero; - y0 = vec_rsqrte( x ); - cmp_zero = (vector unsigned int)vec_cmpeq( x, zero ); - y0x = vec_madd( y0, x, zero ); - y0xhalf = vec_madd( y0x, half, zero ); - return vec_sel( vec_madd( vec_nmsub( y0, y0x, one ), y0xhalf, y0x ), zero, cmp_zero ); + y0 = vec_rsqrte( x ); + cmp_zero = (vector unsigned int)vec_cmpeq( x, zero ); + y0x = vec_madd( y0, x, zero ); + y0xhalf = vec_madd( y0x, half, zero ); + return vec_sel( vec_madd( vec_nmsub( y0, y0x, one ), y0xhalf, y0x ), zero, cmp_zero ); } +#endif diff --git a/Extras/simdmathlibrary/ppu/tanf4.c b/Extras/simdmathlibrary/ppu/simdmath/tanf4.h similarity index 51% rename from Extras/simdmathlibrary/ppu/tanf4.c rename to Extras/simdmathlibrary/ppu/simdmath/tanf4.h index 8f3f6de09..cdb692a00 100644 --- a/Extras/simdmathlibrary/ppu/tanf4.c +++ b/Extras/simdmathlibrary/ppu/simdmath/tanf4.h @@ -27,70 +27,72 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_TANF4_H___ +#define ___SIMD_MATH_TANF4_H___ + #include #include -#include "sincos_c.h" - -#include "common-types.h" - -#define _TAN_KC1 1.57079625129f -#define _TAN_KC2 7.54978995489e-8f +#include +#include // // Computes the tangent of all four slots of x by using a polynomia approximation. // -vector float -tanf4 (vector float x) +static inline vector float +_tanf4 (vector float x) { - vector float xl,xl2,xl3,res; - vector signed int q; + vector float xl,xl2,xl3,res; + vector signed int q; - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f)); - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(x))*sign(x)) - // - xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); - q = vec_cts(xl,0); + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000))); + q = vec_cts(xl,0); - // Remainder in range [-pi/4..pi/4] - // - vector float qf = vec_ctf(q,0); - vector float p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); - xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); + // Remainder in range [-pi/4..pi/4] + // + vector float qf = vec_ctf(q,0); + vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x); + xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1); - // Compute x^2 and x^3 - // - xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); - xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); + // Compute x^2 and x^3 + // + xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f)); + xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f)); - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + x2 * (C0 * x2 + C1), and - // sx = xl + x3 * S0 - // - vector float ct2 = vec_madd(vec_splatsf4( 0.0097099364f),xl2,vec_splatsf4(-0.4291161787f)); + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * (C0 * x2 + C1), and + // sx = xl + x3 * S0 + // + vector float ct2 = vec_madd(__vec_splatsf4( 0.0097099364f),xl2,__vec_splatsf4(-0.4291161787f)); - vector float cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); - vector float sx = vec_madd(vec_splatsf4(-0.0957822992f),xl3,xl); + vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f)); + vector float sx = vec_madd(__vec_splatsf4(-0.0957822992f),xl3,xl); - // Compute both cx/sx and sx/cx - // - vector float cxosx = divf4(cx,sx); - vector float sxocx = divf4(sx,cx); + // Compute both cx/sx and sx/cx + // + vector float cxosx = _divf4(cx,sx); + vector float sxocx = _divf4(sx,cx); - vector float ncxosx = (vector float)vec_xor(vec_splatsu4(0x80000000),(vector unsigned int)cxosx); + vector float ncxosx = (vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)cxosx); - // For odd numbered quadrants return -cx/sx , otherwise return - // sx/cx - // - vector unsigned int mask = (vector unsigned int)vec_cmpeq(vec_and(q,vec_splatsi4(0x1)),vec_splatsi4(0)); - res = vec_sel(ncxosx,sxocx,mask); + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vector unsigned int mask = + (vector unsigned int)vec_cmpeq(vec_and(q,__vec_splatsi4(0x1)),__vec_splatsi4(0)); + res = vec_sel(ncxosx,sxocx,mask); - return res; + return res; } + +#endif diff --git a/Extras/simdmathlibrary/ppu/simdmath/truncf4.h b/Extras/simdmathlibrary/ppu/simdmath/truncf4.h new file mode 100644 index 000000000..ab0edad9e --- /dev/null +++ b/Extras/simdmathlibrary/ppu/simdmath/truncf4.h @@ -0,0 +1,43 @@ +/* truncf4 - for each of four float slots, round towards zero to integer value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_TRUNCF4_H___ +#define ___SIMD_MATH_TRUNCF4_H___ + +#include +#include + + +static inline vector float +_truncf4 (vector float x) +{ + return vec_trunc( x ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/sincos_c.h b/Extras/simdmathlibrary/ppu/sincos_c.h deleted file mode 100644 index ab19b0eb0..000000000 --- a/Extras/simdmathlibrary/ppu/sincos_c.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Common constants for Sin/Cos/Tan - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __SINCOS_C2__ -#define __SINCOS_C2__ - -// -// Common constants used to evaluate sind2/cosd2/tand2 -// -#define _SINCOS_CC0D 0.00000000206374484196 -#define _SINCOS_CC1D -0.00000027555365134677 -#define _SINCOS_CC2D 0.00002480157946764225 -#define _SINCOS_CC3D -0.00138888888730525966 -#define _SINCOS_CC4D 0.04166666666651986722 -#define _SINCOS_CC5D -0.49999999999999547304 - -#define _SINCOS_SC0D 0.00000000015893606014 -#define _SINCOS_SC1D -0.00000002505069049138 -#define _SINCOS_SC2D 0.00000275573131527032 -#define _SINCOS_SC3D -0.00019841269827816117 -#define _SINCOS_SC4D 0.00833333333331908278 -#define _SINCOS_SC5D -0.16666666666666612594 - -#define _SINCOS_KC1D (13176794.0 / 8388608.0) -#define _SINCOS_KC2D 7.5497899548918821691639751442098584e-8 - - -// -// Common constants used to evaluate sinf4/cosf4/tanf4 -// -#define _SINCOS_CC0 -0.0013602249f -#define _SINCOS_CC1 0.0416566950f -#define _SINCOS_CC2 -0.4999990225f -#define _SINCOS_SC0 -0.0001950727f -#define _SINCOS_SC1 0.0083320758f -#define _SINCOS_SC2 -0.1666665247f - -#define _SINCOS_KC1 1.57079625129f -#define _SINCOS_KC2 7.54978995489e-8f - -// -// Common constants used to evaluate sinf4est/cosf4est -// -#define _SINCOS_R1 -0.1666665668f -#define _SINCOS_R2 0.8333025139e-2f -#define _SINCOS_R3 -0.1980741872e-3f -#define _SINCOS_R4 0.2601903036e-5f - -#define _SINCOS_C1 (201.0f/64.0f) -#define _SINCOS_C2 9.67653589793e-4f - - -// common constants used to evaluate sinf/cosf - -#define _SIN_C1 -0.35950439e-4f -#define _SIN_C2 0.2490001007e-2f -#define _SIN_C3 -0.8074543253e-1f -#define _SIN_C4 0.7853981633f - -#define _COS_C1 -0.31872783e-3f -#define _COS_C2 0.1584968416e-1f -#define _COS_C3 -0.30842416558f -#define _COS_C4 0.9999999673f - -#define POW2(x) x*x -#define SPOLY(x) (((_SIN_C1 * POW2(x) + _SIN_C2) * POW2(x) + (_SIN_C3)) * POW2(x) + _SIN_C4) * x -#define CPOLY(x) (((_COS_C1 * POW2(x) + _COS_C2) * POW2(x) + (_COS_C3)) * POW2(x) + _COS_C4) - -#define M_PI 3.141592653589793f -#endif diff --git a/Extras/simdmathlibrary/ppu/sincosf4.c b/Extras/simdmathlibrary/ppu/sincosf4.c deleted file mode 100644 index 19efeed30..000000000 --- a/Extras/simdmathlibrary/ppu/sincosf4.c +++ /dev/null @@ -1,106 +0,0 @@ -/* sincosf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "sincos_c.h" -#include "common-types.h" - -// -// Computes both the sine and cosine of the all four slots of x -// by using a polynomial approximation. -// -void sincosf4 (vector float x, vector float *s, vector float *c) -{ - vec_float4 xl,xl2,xl3; - vec_int4 q; - vec_int4 offsetSin, offsetCos; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); - q = vec_cts(xl,0); - - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - // - offsetSin = vec_and(q,vec_splatsi4((int)0x3)); - offsetCos = vec_add(vec_splatsi4(1),offsetSin); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = vec_ctf(q,0); - vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); - xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); - xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); - vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); - - vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); - vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); - - vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); - vec_float4 sx = vec_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x1)),vec_splatsi4(0)); - vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x1)),vec_splatsi4(0)); - *s = vec_sel(cx,sx,sinMask); - *c = vec_sel(cx,sx,cosMask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x2)),vec_splatsi4(0)); - cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x2)),vec_splatsi4(0)); - - *s = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*s),*s,sinMask); - *c = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*c),*c,cosMask); - -} - diff --git a/Extras/simdmathlibrary/ppu/sinf4.c b/Extras/simdmathlibrary/ppu/sinf4.c deleted file mode 100644 index c382e85bd..000000000 --- a/Extras/simdmathlibrary/ppu/sinf4.c +++ /dev/null @@ -1,103 +0,0 @@ -/* sinf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "sincos_c.h" -#include "common-types.h" -// -// Computes the sine of each of the four slots -// by using a polynomial approximation. -// - -vector float -sinf4 (vector float x) -{ - vec_float4 xl,xl2,xl3,res; - vec_int4 q; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000))); - q = vec_cts(xl,0); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = vec_and(q,vec_splatsi4((int)0x3)); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = vec_ctf(q,0); - vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x); - xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f)); - xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f)); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1)); - vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1)); - - vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2)); - vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2)); - - vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f)); - vec_float4 sx = vec_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset, - vec_splatsi4(0x1)), - vec_splatsi4((int)(0))); - res = vec_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0)); - res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2); - - return res; - -} - diff --git a/Extras/simdmathlibrary/ppu/tests/Makefile b/Extras/simdmathlibrary/ppu/tests/Makefile index abdd85df3..918b90fb3 100644 --- a/Extras/simdmathlibrary/ppu/tests/Makefile +++ b/Extras/simdmathlibrary/ppu/tests/Makefile @@ -35,10 +35,10 @@ STATIC_TESTS = $(TESTS) SHARED_TESTS = $(TESTS:=.shared) ALL_TESTS = $(STATIC_TESTS) $(SHARED_TESTS) -INCLUDES_PPU = -I../../ +INCLUDES_PPU = -I../../common -ARCH_PPU = 64 -CROSS_PPU = ppu- +ARCH_PPU = 32 +CROSS_PPU = AR_PPU = $(CROSS_PPU)ar CC_PPU = $(CROSS_PPU)gcc CXX_PPU = $(CROSS_PPU)g++ @@ -111,21 +111,8 @@ shared_check: ../$(SHARED_LIB): cd ../;$(MAKE) $(MAKE_DEFS) $(SHARED_LIB) -%.o: %.c common-test.h testutils.h +%.o: %.c ../../common/common-test.h testutils.h $(CC_PPU) $(CFLAGS_PPU) -c $< -#---------- -# C++ -#---------- -%.o: %.C - $(CXX_PPU) $(CFLAGS_PPU) -c $< - -%.o: %.cpp - $(CXX_PPU) $(CFLAGS_PPU) -c $< - %.o: %.cc $(CXX_PPU) $(CFLAGS_PPU) -c $< - -%.o: %.cxx - $(CXX_PPU) $(CFLAGS_PPU) -c $< - diff --git a/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h b/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h deleted file mode 100644 index 0d6abd6f8..000000000 --- a/Extras/simdmathlibrary/ppu/tests/floatingpoint_tests.h +++ /dev/null @@ -1,189 +0,0 @@ -/* Header file for common parts of the testsuite - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _FLOATINGPOINT_TESTS_H_ -#define _FLOATINGPOINT_TESTS_H_ - -#if defined(__PPC__) - #include - #define vec_uchar16 vector unsigned char - #define vec_char16 vector signed char - #define vec_ushort8 vector unsigned short - #define vec_short8 vector signed short - #define vec_uint4 vector unsigned int - #define vec_int4 vector signed int - #define vec_ullong2 vector unsigned long long - #define vec_llong2 vector signed long long - #define vec_float4 vector float - #define vec_double2 vector double -#else - #if __SPU__ - #include - #endif -#endif - -// To avoid type punning warnings (for printing in hex notation, doing bit-diff etc) -typedef union { - double d; - unsigned char uc[8]; - unsigned int ui[2]; - unsigned long long int ull; -} sce_math_alt_double; - -typedef union { - float f; - unsigned char uc[4]; - unsigned int ui; -} sce_math_alt_float; - -#if (__PPC__ || __SPU__) -typedef union { - vec_int4 vsi; - int si[4]; -} sce_math_alt_vec_int4; - -typedef union { - vec_uint4 vui; - int ui[4]; -} sce_math_alt_vec_uint4; - -typedef union { - vec_float4 vf; - float sf[4]; - unsigned int ui[4]; -} sce_math_alt_vec_float4; -#endif -#if __SPU__ - typedef union { - double sd[2]; - vec_double2 vd; - unsigned long long int ui[2]; - } sce_math_alt_vec_double2; -#endif - -#if __PPC__ - inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = vec_sub(refi, valsi); - vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff); - - return vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) )); - } - inline int bitdiff(float ref, float val) { - sce_math_alt_float aref, aval; - aref.f = ref; - aval.f = val; - int diff = aref.ui - aval.ui; - return (diff>0)?diff:-diff; - } - inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = vec_sub(refi, valsi); - vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff); - - diff = vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) )); - vec_float4 logdiff = vec_loge(vec_ctf(diff,0)); - return vec_sub(((vec_int4){32,32,32,32}), vec_cts(vec_ceil(logdiff),0)); - } - inline int bitmatch(float ref, float val) { - sce_math_alt_vec_float4 aref, aval; - sce_math_alt_vec_int4 adiff; - aref.sf[0] = ref; - aval.sf[0] = val; - adiff.vsi = bitmatch4(aref.vf, aval.vf); - return adiff.si[0]; - } - inline float extractFloat(vec_float4 vf, int index) - { - sce_math_alt_vec_float4 vec; - vec.vf = vf; - return vec.sf[index]; - } - inline int extractInt(vec_int4 vi, int index) - { - sce_math_alt_vec_int4 vec; - vec.vsi = vi; - return vec.si[index]; - } - inline int extractUInt(vec_uint4 vi, int index) - { - sce_math_alt_vec_uint4 vec; - vec.vui = vi; - return vec.ui[index]; - } -#else - #if __SPU__ - inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = spu_sub(refi, valsi); - vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - - return spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)); - } - inline int bitdiff(float ref, float val) { - return spu_extract(bitdiff4(spu_promote(ref,0), spu_promote(val,0)), 0); - } - inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = spu_sub(refi, valsi); - vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - - return (vec_int4)spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); - } - inline int bitmatch(float ref, float val) { - return spu_extract(bitmatch4(spu_promote(ref,0), spu_promote(val,0)), 0); - } - - #else - inline int bitdiff(sce_math_alt_float ref, sce_math_alt_float val) { - int diff = ref.ui - val.ui; - return((diff>0)?diff:-diff); - } - inline int bitmatch(sce_math_alt_float ref, sce_math_alt_float val) { - int diff, i; - unsigned int udiff; - diff = ref.ui - val.ui; - udiff = (diff>0) ? diff : -diff; - i = 32; - while(udiff != 0) { - i = i-1; - udiff = udiff >> 1; - } - return udiff; - } - #endif // __SPU__ -#endif // __PPC__ - - -#endif // _FLOATINGPOINT_TESTS_H_ diff --git a/Extras/simdmathlibrary/ppu/tests/testutils.h b/Extras/simdmathlibrary/ppu/tests/testutils.h index c7cf78b11..7b905edfd 100644 --- a/Extras/simdmathlibrary/ppu/tests/testutils.h +++ b/Extras/simdmathlibrary/ppu/tests/testutils.h @@ -32,7 +32,17 @@ #ifndef _TESTUTILS_H_ -#include "floatingpoint_tests.h" +#include +#define vec_uchar16 vector unsigned char +#define vec_char16 vector signed char +#define vec_ushort8 vector unsigned short +#define vec_short8 vector signed short +#define vec_uint4 vector unsigned int +#define vec_int4 vector signed int +#define vec_ullong2 vector unsigned long long +#define vec_llong2 vector signed long long +#define vec_float4 vector float +#define vec_double2 vector double extern unsigned int hide_uint( unsigned int x ); extern int hide_int( int x ); diff --git a/Extras/simdmathlibrary/simdmath.spec b/Extras/simdmathlibrary/simdmath.spec index 948a17212..d8352c635 100644 --- a/Extras/simdmathlibrary/simdmath.spec +++ b/Extras/simdmathlibrary/simdmath.spec @@ -1,9 +1,8 @@ -%define major_version 1 -%define minor_version 0 +%define lib_version 1.0.1 Summary: SIMD math library. Name: simdmath -Version: %{major_version}.%{minor_version} +Version: %{lib_version} Release: 1 License: BSD Group: System Environment/Libraries @@ -32,7 +31,7 @@ Summary: SIMD math library. Group: Development/Libraries Requires: %{name} = %{version} -%package -n %{name}-spe-devel +%package -n spu-%{name}-devel Summary: SIMD math library. Group: Development/Libraries @@ -42,7 +41,7 @@ SIMD math library. %description -n %{name}-devel SIMD math library. -%description -n %{name}-spe-devel +%description -n spu-%{name}-devel SIMD math library. %prep @@ -74,7 +73,7 @@ rm -rf %{buildroot} %{_libdir}/*.a %{_libdir}/*.so -%files -n %{name}-spe-devel +%files -n spu-%{name}-devel %defattr(-,root,root,-) %{_spe_includedir}/* %{_spe_libdir}/* diff --git a/Extras/simdmathlibrary/spu/Makefile b/Extras/simdmathlibrary/spu/Makefile index c1b4d1813..56de8d2e2 100644 --- a/Extras/simdmathlibrary/spu/Makefile +++ b/Extras/simdmathlibrary/spu/Makefile @@ -30,11 +30,12 @@ # All that you do to add a file is edit OBJS, the rest will just work prefix = /usr +prefix_spu = $(prefix)/spu DESTDIR = OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \ is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \ - sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \ + sinf4.o isgreaterd2.o sind2.o sincosd2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \ rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \ negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \ cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \ @@ -51,7 +52,7 @@ OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o fmodd2.o remainderd2.o -INCLUDES_SPU = -I../ +INCLUDES_SPU = -I. -I../common CROSS_SPU = spu- AR_SPU = $(CROSS_SPU)ar @@ -66,6 +67,7 @@ INSTALL = install MAKE_DEFS = \ prefix='$(prefix)' \ + prefix_spu='$(prefix_spu)' \ DESTDIR='$(DESTDIR)' \ LIB_BASE='$(LIB_BASE)' \ LIB_NAME='$(LIB_NAME)' \ @@ -89,43 +91,28 @@ $(STATIC_LIB): $(OBJS) $(RANLIB_SPU) $@ install: $(STATIC_LIB) - $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include - $(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/ - $(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib - $(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB) + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include/simdmath + $(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_spu)/include/simdmath/ + $(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/lib + $(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix_spu)/lib/$(STATIC_LIB) clean: cd tests; $(MAKE) $(MAKE_DEFS) clean rm -f $(OBJS) rm -f $(STATIC_LIB) -$(OBJS): ../simdmath.h +$(OBJS): ../common/simdmath.h check: $(STATIC_LIB) cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check # Some Objects have special header files. -sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h -lldivu2.o lldivi2.o : lldiv.h +sinf4.o sind2.o sincosf4.o cosd2.o: ../common/simdmath/_sincos.h +lldivu2.o lldivi2.o : simdmath/_lldiv.h -%.o: %.c +%.o: ../common/%.c simdmath/%.h $(CC_SPU) $(CFLAGS_SPU) -c $< - -#---------- -# C++ -#---------- -%.o: %.C - $(CXX_SPU) $(CFLAGS_SPU) -c $< - -%.o: %.cpp - $(CXX_SPU) $(CFLAGS_SPU) -c $< - -%.o: %.cc - $(CXX_SPU) $(CFLAGS_SPU) -c $< - -%.o: %.cxx - $(CXX_SPU) $(CFLAGS_SPU) -c $< - diff --git a/Extras/simdmathlibrary/spu/cosd2.c b/Extras/simdmathlibrary/spu/cosd2.c deleted file mode 100644 index 8d1d964f6..000000000 --- a/Extras/simdmathlibrary/spu/cosd2.c +++ /dev/null @@ -1,127 +0,0 @@ -/* cosd2 - Computes the cosine of the each of two double slots. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "sincos_c.h" - -vector double -cosd2 (vector double x) -{ - vec_double2 xl,xl2,xl3,res; - vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); - vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull); - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(x))*sign(x)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); - vec_float4 xf = spu_roundtf(xl); - vec_int4 q = spu_convts(xf,0); - q = spu_shuffle(q,q,copyEven); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3))); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_double2 qd = spu_extend(qf); - vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x); - xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1); - - // Check if |xl| is a really small number - // - vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull)); - vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl); - - // Compute x^2 and x^3 - // - xl2 = spu_mul(xl,xl); - xl3 = spu_mul(xl2,xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and - // sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5) - // - - vec_double2 ct0 = spu_mul(xl2,xl2); - vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D)); - vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D)); - vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D)); - vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D)); - vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D)); - vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D)); - vec_double2 ct4 = spu_madd(ct2,ct0,ct3); - vec_double2 st4 = spu_madd(st2,ct0,st3); - vec_double2 ct5 = spu_mul(ct0,ct0); - - vec_double2 ct6 = spu_madd(ct5,ct1,ct4); - vec_double2 st6 = spu_madd(ct5,st1,st4); - - vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0)); - vec_double2 sx = spu_madd(st6,xl3,xl); - - // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0 - // - sx = spu_sel(sx,xl,isTiny); - cx = spu_sel(cx,spu_splats(1.0),isTiny); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); - res = spu_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); - mask2 = spu_shuffle(mask2,mask2,copyEven); - res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2); - // if input = +/-Inf return NAN - // - res = spu_sel(res, nan, isnand2 (x)); - - // if input = 0 or denorm return or 1.0 - // - vec_ullong2 zeroMask = is0denormd2 (x); - res = spu_sel(res,spu_splats(1.0),zeroMask); - return res; -} - - diff --git a/Extras/simdmathlibrary/spu/cosf4.c b/Extras/simdmathlibrary/spu/cosf4.c deleted file mode 100644 index 64a6594d7..000000000 --- a/Extras/simdmathlibrary/spu/cosf4.c +++ /dev/null @@ -1,94 +0,0 @@ -/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "sincos_c.h" - -vector float -cosf4 (vector float x) -{ - vec_float4 xl,xl2,xl3,res; - vec_int4 q; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); - q = spu_convts(xl,0); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3))); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); - xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = spu_mul(xl,xl); - xl3 = spu_mul(xl2,xl); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); - vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); - - vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); - vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); - - vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); - vec_float4 sx = spu_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); - res = spu_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); - res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2); - - return res; - -} diff --git a/Extras/simdmathlibrary/spu/divi4.c b/Extras/simdmathlibrary/spu/divi4.c deleted file mode 100644 index aa03d585b..000000000 --- a/Extras/simdmathlibrary/spu/divi4.c +++ /dev/null @@ -1,109 +0,0 @@ -/* divi4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom -// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator. - -divi4_t divi4 (vector signed int numer, vector signed int denom) -{ - divi4_t res; - vec_int4 quot, newQuot, shift; - vec_uint4 numerPos, denomPos, quotNeg; - vec_uint4 numerAbs, denomAbs; - vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted; - vec_uint4 newNum, skip, cont; - int anyCont; - - // Determine whether result needs sign change - - numerPos = spu_cmpgt( numer, -1 ); - denomPos = spu_cmpgt( denom, -1 ); - quotNeg = spu_xor( numerPos, denomPos ); - - // Use absolute values of numerator, denominator - - numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos ); - denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos ); - - // Get difference of leading zeros. - // Any possible negative value will be interpreted as a shift > 31 - - denomZeros = spu_cntlz( denomAbs ); - numerZeros = spu_cntlz( numerAbs ); - - shift = (vec_int4)spu_sub( denomZeros, numerZeros ); - - // Shift denom to align leading one with numerator's - - denomShifted = spu_sl( denomAbs, (vec_uint4)shift ); - oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift ); - oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) ); - - // Shift left all leading zeros. - - denomLeft = spu_sl( denomAbs, denomZeros ); - oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros ); - - quot = spu_splats(0); - - do - { - cont = spu_cmpgt( oneShifted, 0U ); - anyCont = spu_extract( spu_gather( cont ), 0 ); - - newQuot = spu_or( quot, (vec_int4)oneShifted ); - - // Subtract shifted denominator from remaining numerator - // when denominator is not greater. - - skip = spu_cmpgt( denomShifted, numerAbs ); - newNum = spu_sub( numerAbs, denomShifted ); - - // If denominator is greater, next shift is one more, otherwise - // next shift is number of leading zeros of remaining numerator. - - numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip ); - shift = (vec_int4)spu_sub( skip, numerZeros ); - - oneShifted = spu_rlmask( oneLeft, shift ); - denomShifted = spu_rlmask( denomLeft, shift ); - - quot = spu_sel( newQuot, quot, skip ); - numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) ); - } - while ( anyCont ); - - res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg ); - res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos ); - return res; -} - diff --git a/Extras/simdmathlibrary/spu/fpclassifyd2.c b/Extras/simdmathlibrary/spu/fpclassifyd2.c deleted file mode 100644 index 99242a8e5..000000000 --- a/Extras/simdmathlibrary/spu/fpclassifyd2.c +++ /dev/null @@ -1,94 +0,0 @@ -/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#ifndef FP_NAN -#define FP_NAN (0) -#endif -#ifndef FP_INFINITE -#define FP_INFINITE (1) -#endif -#ifndef FP_ZERO -#define FP_ZERO (2) -#endif -#ifndef FP_SUBNORMAL -#define FP_SUBNORMAL (3) -#endif -#ifndef FP_NORMAL -#define FP_NORMAL (4) -#endif - -vector signed long long -fpclassifyd2 (vector double x) -{ - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull); - vec_ullong2 zero = spu_splats(0x0000000000000000ull); - - vec_ullong2 mask; - vec_llong2 classtype; - vec_uint4 cmpgt, cmpeq; - - //FP_NORMAL: normal unless nan, infinity, zero, or denorm - classtype = spu_splats((long long)FP_NORMAL); - - //FP_NAN: all-ones exponent and non-zero mantissa - cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); - cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); - mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), - spu_and( spu_shuffle( cmpeq, cmpeq, even ), - spu_shuffle( cmpgt, cmpgt, odd ) ) ); - classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask ); - - //FP_INFINITE: all-ones exponent and zero mantissa - mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); - classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask ); - - //FP_ZERO: zero exponent and zero mantissa - cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); - mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); - classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask ); - - //FP_SUBNORMAL: zero exponent and non-zero mantissa - cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero ); - cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero ); - mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), - spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) ); - classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask ); - - return classtype; -} diff --git a/Extras/simdmathlibrary/spu/frexpd2.c b/Extras/simdmathlibrary/spu/frexpd2.c deleted file mode 100644 index 3643c54b5..000000000 --- a/Extras/simdmathlibrary/spu/frexpd2.c +++ /dev/null @@ -1,95 +0,0 @@ -/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x' - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#ifndef DBL_NAN -#define DBL_NAN ((long long)0x7FF8000000000000ull) -#endif - -vector double -frexpd2 (vector double x, vector signed long long *pexp) -{ - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - - vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull}; - - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 zero = spu_splats(0x0000000000000000ull); - - vec_ullong2 isnan, isinf, iszero; - vec_ullong2 e0, x0, x1; - vec_uint4 cmpgt, cmpeq, cmpzr; - vec_int4 lz, lz0, sh, ex; - vec_double2 fr, frac = (vec_double2)zero; - - //NAN: x is NaN (all-ones exponent and non-zero mantissa) - cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) ); - isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even ); - frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan ); - - //INF: x is infinite (all-ones exponent and zero mantissa) - isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); - frac = spu_sel( frac, x , isinf ); - - //x is zero (zero exponent and zero mantissa) - cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); - iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); - - frac = spu_sel( frac, (vec_double2)zero , iszero ); - *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero ); - - //Integer Exponent: if x is normal or subnormal - - //...shift left to normalize fraction, zero shift if normal - lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); - lz0 = (vec_int4)spu_shuffle( lz, lz, even ); - sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) ); - sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) ); - - x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) ); - x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) ); - fr = (vec_double2)spu_sel( x1, x0, maskdw ); - fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn ); - fr = spu_sel( fr, x, sign ); - - e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 ); - ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) ); - - frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) ); - *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) ); - - return frac; -} diff --git a/Extras/simdmathlibrary/spu/ilogbd2.c b/Extras/simdmathlibrary/spu/ilogbd2.c deleted file mode 100644 index 28b390f7d..000000000 --- a/Extras/simdmathlibrary/spu/ilogbd2.c +++ /dev/null @@ -1,84 +0,0 @@ -/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#ifndef FP_ILOGB0 -#define FP_ILOGB0 ((int)0x80000001) -#endif -#ifndef FP_ILOGBNAN -#define FP_ILOGBNAN ((int)0x7FFFFFFF) -#endif - -vector signed long long -ilogbd2 (vector double x) -{ - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 zero = spu_splats(0x0000000000000000ull); - - vec_ullong2 isnan, iszeroinf; - vec_llong2 ilogb = (vec_llong2)zero; - vec_llong2 e1, e2; - vec_uint4 cmpgt, cmpeq, cmpzr; - vec_int4 lz, lz0, lz1; - - //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa) - cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), - spu_and( spu_shuffle( cmpeq, cmpeq, even ), - spu_shuffle( cmpgt, cmpgt, odd ) ) ); - ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan ); - - //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa) - cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); - iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ), - spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) ); - ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf ); - - //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x - e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); - e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 ); - - lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); - lz0 = (vec_int4)spu_shuffle( lz, lz, even ); - lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); - lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); - - ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) ); - - return ilogb; -} diff --git a/Extras/simdmathlibrary/spu/lldiv.h b/Extras/simdmathlibrary/spu/lldiv.h deleted file mode 100644 index f3c990df4..000000000 --- a/Extras/simdmathlibrary/spu/lldiv.h +++ /dev/null @@ -1,123 +0,0 @@ -/* Common functions for lldivi2/lldivu2 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __LLDIV_H__ -#define __LLDIV_H__ - -#include - -static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x); -static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count); -static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count); -static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x); -static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y); -static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y); - -static inline vector unsigned long long -ll_spu_cntlz(vector unsigned long long x) -{ - vec_uint4 cnt; - - cnt = spu_cntlz((vec_uint4)x); - cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4))); - cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11})); - - return (vec_ullong2)cnt; -} - -static inline vector unsigned long long -ll_spu_sl(vector unsigned long long x, vector unsigned long long count) -{ - vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; - vec_ullong2 x_upper, x_lower; - - // shift upper word - x_upper = spu_and(x, mask); - x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1)); - x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1)); - - // shift lower word - x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3)); - x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3)); - - return spu_sel(x_lower, x_upper, mask); -} - -static inline vector unsigned long long -ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count) -{ - vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; - vec_ullong2 x_upper, x_lower; - vec_uint4 cnt_byte; - - cnt_byte = spu_add((vec_uint4)count, 7); - - // shift upper word - x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1)); - x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1)); - - // shift lower word - x_lower = spu_andc(x, mask); - x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3)); - x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3)); - - return spu_sel(x_lower, x_upper, mask); -} - -static inline vector unsigned long long -ll_spu_cmpeq_zero(vector unsigned long long x) -{ - vec_uint4 cmp; - - cmp = spu_cmpeq((vec_uint4)x, 0); - return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); -} - -static inline vector unsigned long long -ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y) -{ - vec_uint4 gt; - - gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y); - gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y)); - return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11})); -} - -static inline vector unsigned long long -ll_spu_sub(vector unsigned long long x, vector unsigned long long y) -{ - vec_uint4 borrow; - - borrow = spu_genb((vec_uint4)x, (vec_uint4)y); - borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); - return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow); -} - -#endif // __LLDIV_H__ - diff --git a/Extras/simdmathlibrary/spu/lldivi2.c b/Extras/simdmathlibrary/spu/lldivi2.c deleted file mode 100644 index a24ff5f5d..000000000 --- a/Extras/simdmathlibrary/spu/lldivi2.c +++ /dev/null @@ -1,128 +0,0 @@ -/* lldivi2 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "lldiv.h" - -static inline vector signed long long _negatell2 (vector signed long long x); - -static inline vector signed long long -_negatell2 (vector signed long long x) -{ - vector signed int zero = (vector signed int){0,0,0,0}; - vector signed int borrow; - - borrow = spu_genb(zero, (vec_int4)x); - borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); - return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); -} - -// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of -// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator. - -lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom) -{ - lldivi2_t res; - vec_ullong2 numerAbs, denomAbs; - vec_uint4 numerPos, denomPos, quotNeg; - - vec_uint4 denomZeros, numerZeros; - vec_int4 shift; - vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft; - vec_ullong2 quot, newQuot; - vec_ullong2 newNum, skip, cont; - int anyCont; - - // Determine whether result needs sign change - - numerPos = spu_cmpgt((vec_int4)numer, -1); - numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); - denomPos = spu_cmpgt((vec_int4)denom, -1); - denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); - quotNeg = spu_xor( numerPos, denomPos ); - - // Use absolute values of numerator, denominator - - numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos); - denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos); - - // Get difference of leading zeros. - - denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs ); - numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs ); - - shift = (vec_int4)spu_sub( denomZeros, numerZeros ); - - // Shift denom to align leading one with numerator's - - denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift ); - oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift ); - oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) ); - - // Shift left all leading zeros. - - denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros ); - oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros ); - - quot = spu_splats(0ull); - - do - { - cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) ); - anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 ); - - newQuot = spu_or( quot, oneShifted ); - - // Subtract shifted denominator from remaining numerator - // when denominator is not greater. - - skip = ll_spu_cmpgt( denomShifted, numerAbs ); - newNum = ll_spu_sub( numerAbs, denomShifted ); - - // If denominator is greater, next shift is one more, otherwise - // next shift is number of leading zeros of remaining numerator. - - numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip ); - shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros ); - - oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift ); - denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift ); - - quot = spu_sel( newQuot, quot, skip ); - numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) ); - } - while ( anyCont ); - - res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg); - res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos); - - return res; -} - diff --git a/Extras/simdmathlibrary/spu/logbd2.c b/Extras/simdmathlibrary/spu/logbd2.c deleted file mode 100644 index d566eeb3f..000000000 --- a/Extras/simdmathlibrary/spu/logbd2.c +++ /dev/null @@ -1,93 +0,0 @@ -/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#ifndef HUGE_VALL -#define HUGE_VALL __builtin_huge_vall () -#endif - -#ifndef DBL_INF -#define DBL_INF ((long long)0x7FF0000000000000ull) -#endif - -#ifndef DBL_NAN -#define DBL_NAN ((long long)0x7FF8000000000000ull) -#endif - -vector double -logbd2 (vector double x) -{ - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 zero = spu_splats(0x0000000000000000ull); - - vec_ullong2 isnan, isinf, iszero; - vec_double2 logb = (vec_double2)zero; - vec_llong2 e1, e2; - vec_uint4 cmpgt, cmpeq, cmpzr; - vec_int4 lz, lz0, lz1; - - //NAN: x is NaN (all-ones exponent and non-zero mantissa) - cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); - isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), - spu_and( spu_shuffle( cmpeq, cmpeq, even ), - spu_shuffle( cmpgt, cmpgt, odd ) ) ); - logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan ); - - //INF: x is infinite (all-ones exponent and zero mantissa) - isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); - logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf ); - - //HUGE_VAL: x is zero (zero exponent and zero mantissa) - cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); - iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); - logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero ); - - //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x - e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); - e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20); - - lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); - lz0 = (vec_int4)spu_shuffle( lz, lz, even ); - lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); - lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); - - logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), - spu_nor( isnan, spu_or( isinf, iszero ) ) ); - - return logb; -} diff --git a/Extras/simdmathlibrary/spu/nextafterd2.c b/Extras/simdmathlibrary/spu/nextafterd2.c deleted file mode 100644 index de43dd4ec..000000000 --- a/Extras/simdmathlibrary/spu/nextafterd2.c +++ /dev/null @@ -1,92 +0,0 @@ -/* nextafterd2 - find next representable floating-point value towards 2nd param. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -vector double -nextafterd2 (vector double xx, vector double yy) -{ - vec_uint4 abs_x, abs_y, sign_x, abs_dif; - vec_uint4 is_sub, is_zerox, is_zeroy; - vec_uint4 is_equal, is_infy, is_nany; - vec_uint4 res0, res1, res; - vec_uint4 vec_zero = ((vec_uint4){0,0,0,0}); - vec_uint4 vec_one = ((vec_uint4){0,1,0,1}); - vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1}); - vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0}); - vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}); - vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}); - - // mask sign bit - abs_x = spu_and( (vec_uint4)xx, msk_abs); - abs_y = spu_and( (vec_uint4)yy, msk_abs); - - is_zerox = spu_cmpeq( abs_x, vec_zero); - is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq)); - - // -0 exception - sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0})); - sign_x = spu_sel(sign_x, vec_zero, is_zerox); - - // if same sign |y| < |x| -> decrease - abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4)); - is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease - is_sub = spu_or(is_sub, abs_dif); - is_sub = spu_rlmaska(is_sub, -31); - is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); - - res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase - res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease - res = spu_sel( res0, res1, is_sub); // select increase or decrease - res = spu_or( res, sign_x); // set sign - - // check exception - // 0 -> -1 - res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub)); - - // check equal (include 0,-0) - is_zeroy = spu_cmpeq( abs_y, vec_zero); - is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq)); - is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy); - is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq)); - is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox)); - res = spu_sel(res, (vec_uint4)yy, is_equal); - - // check nan - is_infy = spu_cmpeq( abs_y, msk_exp); - is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq)); - is_nany = spu_and( abs_y, msk_exp); - is_nany = spu_cmpeq( is_nany, msk_exp); - is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq)); - is_nany = spu_sel( is_nany, vec_zero, is_infy); - res = spu_sel(res, (vec_uint4)yy, is_nany); - - return (vec_double2)res; -} diff --git a/Extras/simdmathlibrary/spu/powf4.c b/Extras/simdmathlibrary/spu/powf4.c deleted file mode 100644 index 98c57a131..000000000 --- a/Extras/simdmathlibrary/spu/powf4.c +++ /dev/null @@ -1,72 +0,0 @@ -/* powf4 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - - -#include -#include - -vector float -powf4 (vector float x, vector float y) -{ - vec_int4 zeros = spu_splats((int)0); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x); - - vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); - - vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); - vec_float4 absx = spu_andc(x, sbit); - vec_float4 absy = spu_andc(y, sbit); - vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001)); - negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros)); - - vec_float4 res = exp2f4(spu_mul(y, log2f4(absx))); - res = spu_sel(res, spu_or(sbit, res), negmask); - - - return spu_sel(res, (vec_float4)zeros, zeromask); -} - -/* -{ - vec_int4 zeros = spu_splats(0); - vec_int4 ones = (vec_int4)spu_splats((char)0xFF); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x); - vec_uchar16 onemask = (vec_uchar16)spu_cmpeq((vec_float4)ones , y); - vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); - vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); - vec_float4 absx = spu_andc(x, sbit); - vec_float4 absy = spu_andc(y, sbit); - vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001)); - negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros)); - - - -} - -*/ diff --git a/Extras/simdmathlibrary/spu/remainderd2.c b/Extras/simdmathlibrary/spu/remainderd2.c deleted file mode 100644 index e44bb8868..000000000 --- a/Extras/simdmathlibrary/spu/remainderd2.c +++ /dev/null @@ -1,313 +0,0 @@ -/* A vector double is returned that contains the remainder xi REM yi, - for the corresponding elements of vector double x and vector double y. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - - -static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _twice(vec_uint4 aa); - -vector double -remainderd2(vector double x, vector double yy) -{ - vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); - vec_uint4 y_hi; - vec_uint4 abs_x, abs_yy, abs_2x, abs_2y; - vec_uint4 bias; - vec_uint4 nan_out, overflow; - vec_uint4 result; - vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); - vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); - vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); - vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); - vec_uint4 vec_zero = spu_splats((unsigned int)0); - vec_uint4 is_zeroy; - - // cut sign - abs_x = spu_andc((vec_uint4)x, sign_mask); - abs_yy = spu_andc((vec_uint4)yy, sign_mask); - y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); - - - // check nan out - is_zeroy = spu_cmpeq(abs_yy, vec_zero); - is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); - nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 - nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 - nan_out = spu_or(nan_out, is_zeroy); // y = 0 - nan_out = spu_shuffle(nan_out, nan_out, splat_hi); - - - // make y x2 - abs_2y = _twice(abs_yy); // 2 x y - - /* - * use fmodd2 function - */ - // get remainder of y x2 -// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y); - { - vec_double2 y = (vec_double2)abs_2y; - - int shiftx0, shiftx1, shifty0, shifty1; - vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); - vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192}); -// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); - vec_int4 n, shift; - vec_uint4 exp_x, exp_y; -// , sign; -// vec_uint4 abs_x, abs_y; - vec_uint4 abs_y; - vec_uint4 mant_x, mant_x0, mant_x1; - vec_uint4 mant_y, mant_y0, mant_y1; - vec_uint4 mant_0, mant_1; - vec_uint4 mant_r, mant_l; -// vec_uint4 result; - vec_uint4 result0, resultx; - vec_uint4 zero_x, zero_y; - vec_uint4 denorm_x, denorm_y; - vec_uint4 cnt, cnt_x, cnt_y; - vec_uint4 shift_x, shift_y; - vec_uint4 adj_x, adj_y; - vec_uint4 z, borrow, mask; - vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL)); -// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); - vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL)); - vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)); -// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); - vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1}); -// vec_uint4 vec_zero = spu_splats((unsigned int)0); - -// sign = spu_and( (vec_uint4)x, sign_mask); -// abs_x = spu_andc((vec_uint4)x, sign_mask); - abs_y = spu_andc((vec_uint4)y, sign_mask); - exp_x = spu_rlmask(abs_x, -20); - exp_y = spu_rlmask(abs_y, -20); - // get shift count for denorm - cnt_x = spu_cntlz(abs_x); - cnt_y = spu_cntlz(abs_y); - cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32))); - cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32))); - - zero_x = spu_cmpgt(cnt_x, 63); // zero ? - zero_y = spu_cmpgt(cnt_y, 63); // zero ? - result0 = spu_or(zero_x, zero_y); - result0 = spu_shuffle(result0, result0, splat_hi); - - // 0 - (cnt_x - 11) = 11 - cnt_x - shift_x= spu_add(cnt_x, -11); - shift_y= spu_add(cnt_y, -11); - cnt_x = spu_sub(11, cnt_x); - cnt_y = spu_sub(11, cnt_y); - - // count to normalize - adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0)); - adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0)); - adj_x = spu_shuffle(adj_x, adj_x, splat_hi); - adj_y = spu_shuffle(adj_y, adj_y, splat_hi); - - // for denorm - shiftx0 = spu_extract(shift_x, 0); - shiftx1 = spu_extract(shift_x, 2); - shifty0 = spu_extract(shift_y, 0); - shifty1 = spu_extract(shift_y, 2); - mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0); - mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0); - mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1); - mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1); - mant_x = spu_sel(mant_x0, mant_x1, merge_sel); - mant_y = spu_sel(mant_y0, mant_y1, merge_sel); - - denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x); - denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y); - mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x); - mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y); - mant_x = spu_or(mant_x, implied_1); // hidden bit - mant_y = spu_or(mant_y, implied_1); // hidden bit - - // x < y ? - resultx = _vec_gt64(abs_y, abs_x); - - n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y); - mask = spu_cmpgt(n, 0); - mask = spu_andc(mask, resultx); - - while (spu_extract(spu_gather(mask), 0)) { - borrow = spu_genb(mant_x, mant_y); - borrow = spu_shuffle(borrow, borrow, propagate); - z = spu_subx(mant_x, mant_y, borrow); - - result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0); - - mant_x = spu_sel(mant_x, - spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)), - mask); - - n = spu_add(n, -1); - mask = spu_cmpgt(n, 0); - } - - borrow = spu_genb(mant_x, mant_y); - borrow = spu_shuffle(borrow, borrow, propagate); - z = spu_subx(mant_x, mant_y, borrow); - mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)); - result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0); - - // bring back to original range - mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0})); - mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1})); - - // for adj_y < 0 exp max=1 - shiftx0 = spu_extract(adj_y, 0); - shiftx1 = spu_extract(adj_y, 2); - mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0); - mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1); - mant_r = spu_sel(mant_x0, mant_x1, merge_sel); - - // for adj_y >= 0 - cnt = spu_cntlz(mant_x); - cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32))); - cnt = spu_add(cnt, -11); - cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1 - shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y)); - shiftx0 = spu_extract(shift, 0); - shiftx1 = spu_extract(shift, 2); - mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0); - mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1); - mant_l = spu_sel(mant_x0, mant_x1, merge_sel); - cnt = spu_sub(adj_y, (vec_uint4)shift); - mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask)); - - result = spu_sel(mant_l, mant_r, denorm_y); - result = spu_sel(result, vec_zero, result0); // reminder 0 - result = spu_sel(result, abs_x, resultx); // x < y -// result = spu_xor(result, sign); // set sign - -// return ((vec_double2)result); - } - - -// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); - abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF))); - - /* if (2*x > y) - * x -= y - * if (2*x >= y) x -= y - */ - overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); - // make x2 - abs_2x = _twice(abs_x); // 2 x x - - bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy - bias = spu_andc(bias, overflow); - - abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); - - - overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous - - // make x2 - abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous - bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31)); - bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); - abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); - - /* select final answer - */ - result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign - result = spu_sel(result, val_nan, nan_out); // if nan - - return ((vec_double2)result); -} - -/* - * subtraction function in limited confdition - */ -static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb) -{ - // which is bigger input aa or bb - vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa - - // need denorm calc ? - vec_uint4 norm_a, norm_b; - norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); - norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); - norm_a = spu_and(norm_a, norm_b); - norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); - - // calc (aa - bb) and (bb - aa) - vec_uint4 res_a, res_b, res; - vec_uint4 borrow_a, borrow_b; - vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192}); - borrow_a = spu_genb(aa, bb); - borrow_b = spu_genb(bb, aa); - borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b); - borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b); - res_a = spu_subx(aa, bb, borrow_a); - res_b = spu_subx(bb, aa, borrow_b); - res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign - - res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa) - // select normal calc or special - res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a); - - return res; -} - - -/* - * extend spu_cmpgt function to 64bit data - */ -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb - vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb - return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right -} -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right - return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); -} - -/* - * double formated x2 - */ -static inline vec_uint4 _twice(vec_uint4 aa) -{ - vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0 - norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); - - // if denorm or zero << 1 , if norm exp + 1 - return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2 -} diff --git a/Extras/simdmathlibrary/spu/remainderf4.c b/Extras/simdmathlibrary/spu/remainderf4.c deleted file mode 100644 index ffbea65c1..000000000 --- a/Extras/simdmathlibrary/spu/remainderf4.c +++ /dev/null @@ -1,107 +0,0 @@ -/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -// -// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. -// If x == 0, the result is 0. -// If x != 0 and y == 0, the result is undefined. -vector float -remainderf4 (vector float x, vector float y) -{ - vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf; - vec_int4 qi0, qi1, qi2; - vec_float4 i0, i1, i2, i, rem; - vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd; - - // Find i = truncated_integer(|x/y|) - - // By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0. - // Thus the exact truncation is either the truncated quotient, one less, or one greater. - - q = divf4( x, y ); - xabs = fabsf4( x ); - yabs = fabsf4( y ); - qabs = fabsf4( q ); - xabs2 = spu_add( xabs, xabs ); - - inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); - inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); - - qi1 = spu_convts( qabs, 0 ); - qi0 = spu_add( qi1, -1 ); - qi2 = spu_add( qi1, 1 ); - - odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 ); - odd0 = odd2 = spu_nor( odd1, odd1 ); - - i0 = spu_convtf( qi0, 0 ); - i1 = spu_convtf( qi1, 0 ); - i2 = spu_convtf( qi2, 0 ); - - // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as - // 2*|x| - i*|y| >= |x|: - // - // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. - // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed - // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. - - cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) ); - cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) ); - - i = i0; - i = spu_sel( i1, i, cmp1 ); - i = spu_sel( i2, i, cmp2 ); - - odd = odd0; - odd = spu_sel( odd1, odd, cmp1 ); - odd = spu_sel( odd2, odd, cmp2 ); - - rem = spu_nmsub( i, yabs, xabs ); - - // Test whether i or i+1 = nearest_integer(|x/y|) - // - // i+1 is correct if: - // - // rem > 0.5*|y| - // or - // rem = 0.5*|y| and i is odd - - yabshalf = spu_mul( yabs, spu_splats(0.5f) ); - cmp1 = spu_cmpgt( rem, yabshalf ); - cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd ); - - i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) ); - i = copysignf4( i, q ); - - return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); -} - diff --git a/Extras/simdmathlibrary/spu/remquod2.c b/Extras/simdmathlibrary/spu/remquod2.c deleted file mode 100644 index 745fd2931..000000000 --- a/Extras/simdmathlibrary/spu/remquod2.c +++ /dev/null @@ -1,356 +0,0 @@ -/* remquod2 - - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include - -/* - * This function returns the same vector double result as remainderd2(). - * In addition a vector signed long long is storedin *pquo, - * that contains the corresponding element values whose sign is - * the sign of xi / yi and whose magnitude is congruent modulo 2n to - * the magnitude of the integral quotient of xi / yi, where n is - * an implementation-defined integer greater than or equal to 3. - */ - -static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _twice(vec_uint4 aa); - -vector double -remquod2(vector double x, vector double yy, vector signed long long *quo) -{ - vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); - vec_int4 quotient, quotient0; - vec_uint4 y_hi; - vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y; - vec_uint4 bias; - vec_uint4 nan_out, not_ge, quo_pos, overflow; - vec_uint4 result; - vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); - vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); - vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); - vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); - vec_uint4 vec_zero = spu_splats((unsigned int)0); - vec_uint4 is_zeroy; - - // cut sign - abs_x = spu_andc((vec_uint4)x, sign_mask); - abs_yy = spu_andc((vec_uint4)yy, sign_mask); - y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); - - quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1); - quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi); - - // check nan out - is_zeroy = spu_cmpeq(abs_yy, vec_zero); - is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); - nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 - nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 - nan_out = spu_or(nan_out, is_zeroy); // y = 0 - nan_out = spu_shuffle(nan_out, nan_out, splat_hi); - - - // make y x8 - abs_2y = _twice(abs_yy); // 2 x y - abs_4y = _twice(abs_2y); // 4 x y - abs_8y = _twice(abs_4y); // 2 x y - - /* - * use fmodd2 function - */ - // get remainder of y x8 -// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y); - { - vec_double2 y = (vec_double2)abs_8y; - - int shiftx0, shiftx1, shifty0, shifty1; - vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); - vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192}); -// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); - vec_int4 n, shift; - vec_uint4 exp_x, exp_y; -// , sign; -// vec_uint4 abs_x, abs_y; - vec_uint4 abs_y; - vec_uint4 mant_x, mant_x0, mant_x1; - vec_uint4 mant_y, mant_y0, mant_y1; - vec_uint4 mant_0, mant_1; - vec_uint4 mant_r, mant_l; -// vec_uint4 result; - vec_uint4 result0, resultx; - vec_uint4 zero_x, zero_y; - vec_uint4 denorm_x, denorm_y; - vec_uint4 cnt, cnt_x, cnt_y; - vec_uint4 shift_x, shift_y; - vec_uint4 adj_x, adj_y; - vec_uint4 z, borrow, mask; - vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL)); -// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); - vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL)); - vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)); -// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); - vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1}); -// vec_uint4 vec_zero = spu_splats((unsigned int)0); - -// sign = spu_and( (vec_uint4)x, sign_mask); -// abs_x = spu_andc((vec_uint4)x, sign_mask); - abs_y = spu_andc((vec_uint4)y, sign_mask); - exp_x = spu_rlmask(abs_x, -20); - exp_y = spu_rlmask(abs_y, -20); - // get shift count for denorm - cnt_x = spu_cntlz(abs_x); - cnt_y = spu_cntlz(abs_y); - cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32))); - cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32))); - - zero_x = spu_cmpgt(cnt_x, 63); // zero ? - zero_y = spu_cmpgt(cnt_y, 63); // zero ? - result0 = spu_or(zero_x, zero_y); - result0 = spu_shuffle(result0, result0, splat_hi); - - // 0 - (cnt_x - 11) = 11 - cnt_x - shift_x= spu_add(cnt_x, -11); - shift_y= spu_add(cnt_y, -11); - cnt_x = spu_sub(11, cnt_x); - cnt_y = spu_sub(11, cnt_y); - - // count to normalize - adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0)); - adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0)); - adj_x = spu_shuffle(adj_x, adj_x, splat_hi); - adj_y = spu_shuffle(adj_y, adj_y, splat_hi); - - // for denorm - shiftx0 = spu_extract(shift_x, 0); - shiftx1 = spu_extract(shift_x, 2); - shifty0 = spu_extract(shift_y, 0); - shifty1 = spu_extract(shift_y, 2); - mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0); - mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0); - mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1); - mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1); - mant_x = spu_sel(mant_x0, mant_x1, merge_sel); - mant_y = spu_sel(mant_y0, mant_y1, merge_sel); - - denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x); - denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y); - mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x); - mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y); - mant_x = spu_or(mant_x, implied_1); // hidden bit - mant_y = spu_or(mant_y, implied_1); // hidden bit - - // x < y ? - resultx = _vec_gt64(abs_y, abs_x); - - n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y); - mask = spu_cmpgt(n, 0); - mask = spu_andc(mask, resultx); - - while (spu_extract(spu_gather(mask), 0)) { - borrow = spu_genb(mant_x, mant_y); - borrow = spu_shuffle(borrow, borrow, propagate); - z = spu_subx(mant_x, mant_y, borrow); - - result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0); - - mant_x = spu_sel(mant_x, - spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)), - mask); - - n = spu_add(n, -1); - mask = spu_cmpgt(n, 0); - } - - borrow = spu_genb(mant_x, mant_y); - borrow = spu_shuffle(borrow, borrow, propagate); - z = spu_subx(mant_x, mant_y, borrow); - mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)); - result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0); - - // bring back to original range - mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0})); - mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1})); - - // for adj_y < 0 exp max=1 - shiftx0 = spu_extract(adj_y, 0); - shiftx1 = spu_extract(adj_y, 2); - mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0); - mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1); - mant_r = spu_sel(mant_x0, mant_x1, merge_sel); - - // for adj_y >= 0 - cnt = spu_cntlz(mant_x); - cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32))); - cnt = spu_add(cnt, -11); - cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1 - shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y)); - shiftx0 = spu_extract(shift, 0); - shiftx1 = spu_extract(shift, 2); - mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0); - mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1); - mant_l = spu_sel(mant_x0, mant_x1, merge_sel); - cnt = spu_sub(adj_y, (vec_uint4)shift); - mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask)); - - result = spu_sel(mant_l, mant_r, denorm_y); - result = spu_sel(result, vec_zero, result0); // reminder 0 - result = spu_sel(result, abs_x, resultx); // x < y -// result = spu_xor(result, sign); // set sign - -// return ((vec_double2)result); - } - - // if y (x8->exp+3 7FF-7FC) overflow -// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); - abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); - - /* if (x >= 4*y) - * x -= 4*y - * quotient = 4 - * else - * quotient = 0 - */ - overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF)); - - not_ge = _vec_gt64(abs_4y, abs_x); - not_ge = spu_or(not_ge, overflow); - abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge); - quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge); - - /* if (x >= 2*y - * x -= 2*y - * quotient += 2 - */ - overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF)); - - not_ge = _vec_gt64(abs_2y, abs_x); // abs_2y > abs_x - not_ge = spu_or(not_ge, overflow); - - abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge); - quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge); - - /* if (2*x > y) - * x -= y - * if (2*x >= y) x -= y - */ - overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); - // make x2 - abs_2x = _twice(abs_x); // 2 x x - - bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy - bias = spu_andc(bias, overflow); - - abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); - quotient = spu_sub(quotient, (vec_int4)bias); - - overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous - - // make x2 - abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous - bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31)); - bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); - abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias); - quotient = spu_sub(quotient, (vec_int4)bias); - - /* select final answer - */ - result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign - result = spu_sel(result, val_nan, nan_out); // if nan - - quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit - quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4)); - quotient = spu_sel(quotient0, quotient, quo_pos); - - *quo = (vec_llong2)quotient; - - return ((vec_double2)result); -} - -/* - * subtraction function in limited confdition - */ -static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb) -{ - // which is bigger input aa or bb - vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa - - // need denorm calc ? - vec_uint4 norm_a, norm_b; - norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); - norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); - norm_a = spu_and(norm_a, norm_b); - norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); - - // calc (aa - bb) and (bb - aa) - vec_uint4 res_a, res_b, res; - vec_uint4 borrow_a, borrow_b; - vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192}); - borrow_a = spu_genb(aa, bb); - borrow_b = spu_genb(bb, aa); - borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b); - borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b); - res_a = spu_subx(aa, bb, borrow_a); - res_b = spu_subx(bb, aa, borrow_b); - res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign - - res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa) - // select normal calc or special - res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a); - - return res; -} - - -/* - * extend spu_cmpgt function to 64bit data - */ -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb - vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb - return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right -} -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right - return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); -} - -/* - * double formated x2 - */ -static inline vec_uint4 _twice(vec_uint4 aa) -{ - vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0 - norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); - - // if denorm or zero << 1 , if norm exp + 1 - return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2 -} diff --git a/Extras/simdmathlibrary/spu/rsqrtd2.c b/Extras/simdmathlibrary/spu/rsqrtd2.c deleted file mode 100644 index 05b2599e3..000000000 --- a/Extras/simdmathlibrary/spu/rsqrtd2.c +++ /dev/null @@ -1,96 +0,0 @@ -/* rsqrtd2 - for each of two double slots, compute reciprocal square root. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -// -// Handles exceptional values as follows: -// NaN -> NaN -// (+,-)0 -> (+,-)Inf -// +Inf -> +0 -// -Inf -> Nan -// -Finite -> Nan -// Denormal inputs are treated as zero. - -vector double rsqrtd2 (vector double x) -{ - vec_ullong2 expmask, onemask, signmask, evenexp; - vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result; - vec_float4 halff, onef, manf, y0f, y1f; - - expmask = spu_splats(0x7ff0000000000000ull); - onemask = spu_splats(0x0010000000000000ull); - signmask = spu_splats(0x8000000000000000ull); - onef = spu_splats(1.0f); - one = spu_extend( onef ); - halff = spu_splats(0.5f); - half = spu_extend( halff ); - - // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) - // where i = 0 when exponent is even and i = 1 when exponent is odd. - // - // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: - // - // biased_exp = 1023 + exponent - // new_biased_exp = 1023 - (exponent+i)/2 - // = 1023 - (biased_exp-1023+i)/2 - // = (3069 - (biased_exp+i)) / 2 - - evenexp = spu_and( (vec_ullong2)x, onemask ); - man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); - - exp = spu_and( x, (vec_double2)expmask ); - nexp = spu_or( exp, (vec_double2)onemask ); - nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); - - // Compute mantissa part in single precision. - // Convert back to double and multiply with 2^(-(exponent+i)/2), then - // do two Newton-Raphson steps for full precision. - - manf = spu_roundtf( man ); - y0f = spu_rsqrte( manf ); - y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); - y1 = spu_mul( spu_extend( y1f ), nexp ); - y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); - y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); - - // Choose iterated result or special value. - - zero = spu_and( x, (vec_double2)signmask ); - inf = spu_sel( (vec_double2)expmask, x, signmask ); - nan = (vec_double2)spu_splats(0x7ff8000000000000ull); - - result = spu_sel( y3, zero, isinfd2 ( x ) ); - result = spu_sel( result, nan, signbitd2 ( x ) ); - result = spu_sel( result, inf, is0denormd2 ( x ) ); - - return result; -} - diff --git a/Extras/simdmathlibrary/spu/simdmath/_lldiv.h b/Extras/simdmathlibrary/spu/simdmath/_lldiv.h new file mode 100644 index 000000000..b3d8a2baf --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/_lldiv.h @@ -0,0 +1,116 @@ +/* Common functions for lldivi2/lldivu2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_LLDIV_H___ +#define ___SIMD_MATH_LLDIV_H___ + +#include + +static inline vector unsigned long long +__ll_spu_cntlz(vector unsigned long long x) +{ + vec_uint4 cnt; + + cnt = spu_cntlz((vec_uint4)x); + cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4))); + cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11})); + + return (vec_ullong2)cnt; +} + +static inline vector unsigned long long +__ll_spu_sl(vector unsigned long long x, vector unsigned long long count) +{ + vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; + vec_ullong2 x_upper, x_lower; + + // shift upper word + x_upper = spu_and(x, mask); + x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1)); + x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1)); + + // shift lower word + x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3)); + x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3)); + + return spu_sel(x_lower, x_upper, mask); +} + +static inline vector unsigned long long +__ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count) +{ + vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull}; + vec_ullong2 x_upper, x_lower; + vec_uint4 cnt_byte; + + cnt_byte = spu_add((vec_uint4)count, 7); + + // shift upper word + x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1)); + x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1)); + + // shift lower word + x_lower = spu_andc(x, mask); + x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3)); + x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3)); + + return spu_sel(x_lower, x_upper, mask); +} + +static inline vector unsigned long long +__ll_spu_cmpeq_zero(vector unsigned long long x) +{ + vec_uint4 cmp; + + cmp = spu_cmpeq((vec_uint4)x, 0); + return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); +} + +static inline vector unsigned long long +__ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y) +{ + vec_uint4 gt; + + gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y); + gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y)); + return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +static inline vector unsigned long long +__ll_spu_sub(vector unsigned long long x, vector unsigned long long y) +{ + vec_uint4 borrow; + + borrow = spu_genb((vec_uint4)x, (vec_uint4)y); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); + return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow); +} + +#endif // __LLDIV_H__ + diff --git a/Extras/simdmathlibrary/spu/simdmath/_remainder.h b/Extras/simdmathlibrary/spu/simdmath/_remainder.h new file mode 100644 index 000000000..b4dcbf97c --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/_remainder.h @@ -0,0 +1,84 @@ +/* A vector double is returned that contains the internal routine regarding remainder. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH__REMAINDER_H___ +#define ___SIMD_MATH__REMAINDER_H___ + +#include + +/* + * double formated x2 + */ +static inline vec_uint4 +__rem_twice_d(vec_uint4 aa) +{ + vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0 + norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // if denorm or zero << 1 , if norm exp + 1 + return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2 +} + +/* + * subtraction function in limited confdition + */ +static inline vec_uint4 +__rem_sub_d(vec_uint4 aa, vec_uint4 bb) +{ + // which is bigger input aa or bb + vec_uint4 is_bigb = __vec_gt64(bb, aa); // bb > aa + + // need denorm calc ? + vec_uint4 norm_a, norm_b; + norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); + norm_a = spu_and(norm_a, norm_b); + norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); + + // calc (aa - bb) and (bb - aa) + vec_uint4 res_a, res_b, res; + vec_uint4 borrow_a, borrow_b; + vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192}); + borrow_a = spu_genb(aa, bb); + borrow_b = spu_genb(bb, aa); + borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b); + borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b); + res_a = spu_subx(aa, bb, borrow_a); + res_b = spu_subx(bb, aa, borrow_b); + res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign + + res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa) + // select normal calc or special + res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a); + + return res; +} + +#endif + diff --git a/Extras/simdmathlibrary/spu/simdmath/_vec_utils.h b/Extras/simdmathlibrary/spu/simdmath/_vec_utils.h new file mode 100644 index 000000000..037eeec3d --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/_vec_utils.h @@ -0,0 +1,57 @@ +/* Common types for SPU SIMD Math Library + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH__VEC_UTILS_H___ +#define ___SIMD_MATH__VEC_UTILS_H___ + +/* + * extend spu_cmpgt function to 64bit data + */ +static inline vec_uint4 +__vec_gt64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb + vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb + return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right +} +static inline vec_uint4 +__vec_gt64(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 gt_hi = __vec_gt64_half(aa, bb); // only higher is right + return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); +} + +static inline vec_uint4 +__vec_eq64_half(vec_uint4 aa, vec_uint4 bb) +{ + vec_uint4 eq = spu_cmpeq(aa, bb); + return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/absi4.h b/Extras/simdmathlibrary/spu/simdmath/absi4.h new file mode 100644 index 000000000..813041c6f --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/absi4.h @@ -0,0 +1,44 @@ +/* absi4 - for each of four integer slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ABSI4_H___ +#define ___SIMD_MATH_ABSI4_H___ + +#include +#include + +static inline vector signed int +_absi4 (vector signed int x) +{ + vec_int4 neg; + neg = spu_sub( 0, x ); + return spu_sel( neg, x, spu_cmpgt( x, -1 ) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/acosf4.c b/Extras/simdmathlibrary/spu/simdmath/acosf4.h similarity index 56% rename from Extras/simdmathlibrary/spu/acosf4.c rename to Extras/simdmathlibrary/spu/simdmath/acosf4.h index 296bf86a8..c492d53cb 100644 --- a/Extras/simdmathlibrary/spu/acosf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/acosf4.h @@ -27,52 +27,56 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ACOSF4_H___ +#define ___SIMD_MATH_ACOSF4_H___ #include #include +#include + // // Computes the inverse cosine of all four slots of x // -vector float -acosf4 (vector float x) +static inline vector float +_acosf4 (vector float x) { - vec_float4 result, xabs; - vec_float4 t1; - vec_float4 xabs2, xabs4; - vec_float4 hi, lo; - vec_float4 neg, pos; - vec_uint4 select; + vec_float4 result, xabs; + vec_float4 t1; + vec_float4 xabs2, xabs4; + vec_float4 hi, lo; + vec_float4 neg, pos; + vec_uint4 select; - xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1)); - select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31)); + xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1)); + select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31)); - t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs)); + t1 = _sqrtf4(spu_sub( spu_splats(1.0f), xabs)); - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves (hi amd lo). - */ - xabs2 = spu_mul(xabs, xabs); - xabs4 = spu_mul(xabs2, xabs2); - hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f)); - hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f)); - hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f)); - lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f)); - lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f)); - lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f)); + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + xabs2 = spu_mul(xabs, xabs); + xabs4 = spu_mul(xabs2, xabs2); + hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f)); + hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f)); + hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f)); + lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f)); + lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f)); + lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f)); - result = spu_madd(hi, xabs4, lo); + result = spu_madd(hi, xabs4, lo); - /* Adjust the result if x is negactive. - */ - neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f)); - pos = spu_mul(t1, result); + /* Adjust the result if x is negactive. + */ + neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f)); + pos = spu_mul(t1, result); - result = spu_sel(pos, neg, select); + result = spu_sel(pos, neg, select); - return result; + return result; } - +#endif diff --git a/Extras/simdmathlibrary/spu/asinf4.c b/Extras/simdmathlibrary/spu/simdmath/asinf4.h similarity index 55% rename from Extras/simdmathlibrary/spu/asinf4.c rename to Extras/simdmathlibrary/spu/simdmath/asinf4.h index 460abdb53..eb6273121 100644 --- a/Extras/simdmathlibrary/spu/asinf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/asinf4.h @@ -27,59 +27,66 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ASINF4_H___ +#define ___SIMD_MATH_ASINF4_H___ + #include #include -vector float -asinf4 (vector float x) +#include +#include + +static inline vector float +_asinf4 (vector float x) { - // positive = (x > 0) - // - vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f)); + // positive = (x > 0) + // + vec_uint4 positive = spu_cmpgt(x,spu_splats(0.0f)); - // gtHalf = (|x| > 0.5) - // - vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f)); + // gtHalf = (|x| > 0.5) + // + vec_uint4 gtHalf = spu_cmpabsgt(x,spu_splats(0.5f)); - // x = absf(x) - // - x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff)); + // x = absf(x) + // + x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff)); - // if (x > 0.5) - // g = 0.5 - 0.5*x - // x = -2 * sqrtf(g) - // else - // g = x * x - // - vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf); + // if (x > 0.5) + // g = 0.5 - 0.5*x + // x = -2 * sqrtf(g) + // else + // g = x * x + // + vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf); - x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf); + x = spu_sel(x,spu_mul(spu_splats(-2.0f),_sqrtf4(g)),gtHalf); - // Compute the polynomials and take their ratio - // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f - // num = x * g * (-0.504400557f * g + 0.933933258f) - // - vec_float4 denom = spu_add(g,spu_splats(-5.54846723f)); - vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f)); - denom = spu_madd(denom,g,spu_splats(5.603603363f)); - num = spu_mul(spu_mul(x,g),num); + // Compute the polynomials and take their ratio + // denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f + // num = x * g * (-0.504400557f * g + 0.933933258f) + // + vec_float4 denom = spu_add(g,spu_splats(-5.54846723f)); + vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f)); + denom = spu_madd(denom,g,spu_splats(5.603603363f)); + num = spu_mul(spu_mul(x,g),num); - // x = x + num / denom - // - x = spu_add(x,divf4(num,denom)); + // x = x + num / denom + // + x = spu_add(x,_divf4(num,denom)); - // if (x > 0.5) - // x = x + M_PI_2 - // - x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf); + // if (x > 0.5) + // x = x + M_PI_2 + // + x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf); - // if (!positive) x = -x - // - x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive); + // if (!positive) x = -x + // + x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive); - return x; + return x; } +#endif diff --git a/Extras/simdmathlibrary/spu/atan2f4.c b/Extras/simdmathlibrary/spu/simdmath/atan2f4.h similarity index 71% rename from Extras/simdmathlibrary/spu/atan2f4.c rename to Extras/simdmathlibrary/spu/simdmath/atan2f4.h index da0eef280..d17993ee2 100644 --- a/Extras/simdmathlibrary/spu/atan2f4.c +++ b/Extras/simdmathlibrary/spu/simdmath/atan2f4.h @@ -27,34 +27,40 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ATAN2F4_H___ +#define ___SIMD_MATH_ATAN2F4_H___ #include #include +#include +#include // // Inverse tangent function of two variables // -vector float -atan2f4 (vector float y, vector float x) +static inline vector float +_atan2f4 (vector float y, vector float x) { - vec_float4 res = atanf4(divf4(y,x)); + vec_float4 res = _atanf4(_divf4(y,x)); - // Use the arguments to determine the quadrant of the result: - // if (x < 0) - // if (y < 0) - // res = -PI + res - // else - // res = PI + res - // - vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y); - vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x); + // Use the arguments to determine the quadrant of the result: + // if (x < 0) + // if (y < 0) + // res = -PI + res + // else + // res = PI + res + // + vec_uint4 yNeg = spu_cmpgt(spu_splats(0.0f),y); + vec_uint4 xNeg = spu_cmpgt(spu_splats(0.0f),x); - vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg); + vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg); - vec_float4 newRes = spu_add(bias, res); + vec_float4 newRes = spu_add(bias, res); - res = spu_sel(res,newRes,xNeg); + res = spu_sel(res,newRes,xNeg); - return res; + return res; } + +#endif diff --git a/Extras/simdmathlibrary/spu/atanf4.c b/Extras/simdmathlibrary/spu/simdmath/atanf4.h similarity index 58% rename from Extras/simdmathlibrary/spu/atanf4.c rename to Extras/simdmathlibrary/spu/simdmath/atanf4.h index 53bf3b415..49c8bee45 100644 --- a/Extras/simdmathlibrary/spu/atanf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/atanf4.h @@ -27,50 +27,55 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ATANF4_H___ +#define ___SIMD_MATH_ATANF4_H___ #include #include +#include + // // Computes the inverse tangent of all four slots of x. // -vector float -atanf4 (vector float x) +static inline vector float +_atanf4 (vector float x) { - vec_float4 bias; - vec_float4 x2, x3, x4, x8, x9; - vec_float4 hi, lo; - vec_float4 result; - vec_float4 inv_x; - vec_uint4 sign; - vec_uint4 select; + vec_float4 bias; + vec_float4 x2, x3, x4, x8, x9; + vec_float4 hi, lo; + vec_float4 result; + vec_float4 inv_x; + vec_uint4 sign; + vec_uint4 select; - sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31); - inv_x = recipf4(x); - inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u)); + sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31); + inv_x = _recipf4(x); + inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u)); - select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f)); - bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f))); - bias = (vec_float4)spu_and((vec_uint4)bias, select); + select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f)); + bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f))); + bias = (vec_float4)spu_and((vec_uint4)bias, select); - x = spu_sel(x, inv_x, select); + x = spu_sel(x, inv_x, select); - bias = spu_add(bias, x); - x2 = spu_mul(x, x); - x3 = spu_mul(x2, x); - x4 = spu_mul(x2, x2); - x8 = spu_mul(x4, x4); - x9 = spu_mul(x8, x); - hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f)); - hi = spu_madd(hi, x2, spu_splats(0.0429096138f)); - hi = spu_madd(hi, x2, spu_splats(-0.0752896400f)); - hi = spu_madd(hi, x2, spu_splats(0.1065626393f)); - lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f)); - lo = spu_madd(lo, x2, spu_splats(-0.3333314528f)); - lo = spu_madd(lo, x3, bias); + bias = spu_add(bias, x); + x2 = spu_mul(x, x); + x3 = spu_mul(x2, x); + x4 = spu_mul(x2, x2); + x8 = spu_mul(x4, x4); + x9 = spu_mul(x8, x); + hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f)); + hi = spu_madd(hi, x2, spu_splats(0.0429096138f)); + hi = spu_madd(hi, x2, spu_splats(-0.0752896400f)); + hi = spu_madd(hi, x2, spu_splats(0.1065626393f)); + lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f)); + lo = spu_madd(lo, x2, spu_splats(-0.3333314528f)); + lo = spu_madd(lo, x3, bias); - result = spu_madd(hi, x9, lo); + result = spu_madd(hi, x9, lo); - return result; + return result; } +#endif diff --git a/Extras/simdmathlibrary/spu/cbrtf4.c b/Extras/simdmathlibrary/spu/simdmath/cbrtf4.h similarity index 57% rename from Extras/simdmathlibrary/spu/cbrtf4.c rename to Extras/simdmathlibrary/spu/simdmath/cbrtf4.h index 95d18a968..c44aa2f3d 100644 --- a/Extras/simdmathlibrary/spu/cbrtf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/cbrtf4.h @@ -27,79 +27,69 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_CBRTF4_H___ +#define ___SIMD_MATH_CBRTF4_H___ #include #include +#include +#include +#include -#define __calcQuot(xexp) n = xexp; \ - vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n); \ - n = spu_sel(n, spu_add(n,2), negxexpmask); \ - \ - quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); \ - quot = spu_add(quot, spu_rlmaska(quot, -4)); \ - quot = spu_add(quot, spu_rlmaska(quot, -8)); \ - quot = spu_add(quot, spu_rlmaska(quot,-16)); \ - vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); \ - quot = spu_add( \ - quot, \ - spu_rlmaska( \ - spu_add( \ - spu_add(r,5), \ - spu_sl (r,2) \ - ), \ - -4 \ - ) \ - ); \ +static inline vec_int4 +__cbrtf4_calc_quot(vec_int4 n) +{ + vec_int4 quot; + vec_uint4 negxexpmask = spu_cmpgt(spu_splats(0), n); + n = spu_sel(n, spu_add(n,2), negxexpmask); -#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3) -#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) + quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); + quot = spu_add(quot, spu_rlmaska(quot, -4)); + quot = spu_add(quot, spu_rlmaska(quot, -8)); + quot = spu_add(quot, spu_rlmaska(quot,-16)); + vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); + quot = spu_add(quot, spu_rlmaska(spu_add(spu_add(r,5), spu_sl (r,2)), -4)); + return quot; +} -vector float -cbrtf4 (vector float x) +#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3) +#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3) + +static inline vector float +_cbrtf4 (vector float x) { vec_float4 zeros = spu_splats(0.0f); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros); - vec_int4 xexp, n; + vec_uint4 zeromask = spu_cmpeq(x, zeros); + vec_int4 xexp; vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF); - vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); + vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x); x = spu_and(x, sgnmask); - x = frexpf4(x, &xexp); + x = _frexpf4(x, &xexp); vec_float4 p = spu_madd( spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)), x, spu_splats(0.492659620528969547f) ); vec_float4 p3 = spu_mul(p, spu_mul(p, p)); - vec_int4 quot; - __calcQuot(xexp); + vec_int4 quot = __cbrtf4_calc_quot(xexp); vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient - vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2)); - factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1)); + vec_float4 factor = spu_splats((float)(1.0/__CBRTF_sqr_cbrt2)); + factor = spu_sel(factor, spu_splats((float)(1.0/__CBRTF_cbrt2)), spu_cmpeq(modval,-1)); factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0)); - factor = spu_sel(factor, spu_splats((float)( _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1)); - factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2)); + factor = spu_sel(factor, spu_splats((float)( __CBRTF_cbrt2)), spu_cmpeq(modval, 1)); + factor = spu_sel(factor, spu_splats((float)(__CBRTF_sqr_cbrt2)), spu_cmpeq(modval, 2)); vec_float4 pre = spu_mul(p, factor); vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3); vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x ); - vec_float4 res = spu_mul(pre, divf4(numr, denr)); - res = ldexpf4(res, quot); + vec_float4 res = spu_mul(pre, _divf4(numr, denr)); + res = _ldexpf4(res, quot); return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask), zeros, zeromask); } -/* -_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x)) -{ - vec_uchar16 neg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); - vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); - vec_float4 absx = spu_andc(x, sbit); - vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx))); - res = spu_sel(res, spu_or(sbit, res), neg); - return res; -} -*/ +#endif diff --git a/Extras/simdmathlibrary/spu/ceild2.c b/Extras/simdmathlibrary/spu/simdmath/ceild2.h similarity index 96% rename from Extras/simdmathlibrary/spu/ceild2.c rename to Extras/simdmathlibrary/spu/simdmath/ceild2.h index 8bbad0ea7..a2d7d3f9b 100644 --- a/Extras/simdmathlibrary/spu/ceild2.c +++ b/Extras/simdmathlibrary/spu/simdmath/ceild2.h @@ -27,11 +27,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_CEILD2_H___ +#define ___SIMD_MATH_CEILD2_H___ + #include #include -vector double -ceild2(vector double in) +static inline vector double +_ceild2(vector double in) { vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); @@ -83,7 +86,7 @@ ceild2(vector double in) insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0); /* replace insert - */ + */ in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign)); /* in + addend @@ -92,3 +95,5 @@ ceild2(vector double in) return (out); } + +#endif diff --git a/Extras/simdmathlibrary/spu/ceilf4.c b/Extras/simdmathlibrary/spu/simdmath/ceilf4.h similarity index 74% rename from Extras/simdmathlibrary/spu/ceilf4.c rename to Extras/simdmathlibrary/spu/simdmath/ceilf4.h index 90bc1817d..06468a2d8 100644 --- a/Extras/simdmathlibrary/spu/ceilf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/ceilf4.h @@ -27,28 +27,32 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_CEILF4_H___ +#define ___SIMD_MATH_CEILF4_H___ + #include #include -vector float -ceilf4 (vector float x) +static inline vector float +_ceilf4 (vector float x) { - vec_int4 xi, xi1; - vec_uint4 inrange; - vec_float4 truncated, truncated1; + vec_int4 xi, xi1; + vec_uint4 inrange; + vec_float4 truncated, truncated1; - // Find truncated value and one greater. + // Find truncated value and one greater. - inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); - xi = spu_convts( x, 0 ); - xi1 = spu_add( xi, 1 ); + xi = spu_convts( x, 0 ); + xi1 = spu_add( xi, 1 ); - truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); - truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); + truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); + truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); - // If truncated value is less than input, add one. + // If truncated value is less than input, add one. - return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) ); + return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/copysignd2.h b/Extras/simdmathlibrary/spu/simdmath/copysignd2.h new file mode 100644 index 000000000..be4137ad4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/copysignd2.h @@ -0,0 +1,43 @@ +/* copysignd2 - for each of two double slots, return value with magnitude from x and sign from y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_COPYSIGND2_H___ +#define ___SIMD_MATH_COPYSIGND2_H___ + +#include +#include + + +static inline vector double +_copysignd2 (vector double x, vector double y) +{ + return spu_sel( x, y, spu_splats(0x8000000000000000ull) ); +} + +#endif diff --git a/Extras/simdmathlibrary/ppu/copysignf4.c b/Extras/simdmathlibrary/spu/simdmath/copysignf4.h similarity index 88% rename from Extras/simdmathlibrary/ppu/copysignf4.c rename to Extras/simdmathlibrary/spu/simdmath/copysignf4.h index 458b34211..1871a4bbb 100644 --- a/Extras/simdmathlibrary/ppu/copysignf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/copysignf4.h @@ -27,15 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_COPYSIGNF4_H___ +#define ___SIMD_MATH_COPYSIGNF4_H___ + #include -#include - -#include "common-types.h" +#include -vector float -copysignf4 (vector float x, vector float y) +static inline vector float +_copysignf4 (vector float x, vector float y) { - return vec_sel( x, y, vec_splatsu4(0x80000000) ); + return spu_sel( x, y, spu_splats(0x80000000) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/cosd2.h b/Extras/simdmathlibrary/spu/simdmath/cosd2.h new file mode 100644 index 000000000..5fd6bf0df --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/cosd2.h @@ -0,0 +1,46 @@ +/* cosd2 - Computes the cosine of the each of two double slots. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_COSD2_H___ +#define ___SIMD_MATH_COSD2_H___ + +#include +#include + +#include + +static inline vector double +_cosd2 (vector double x) +{ + vec_double2 s, c; + _sincosd2(x, &s, &c); + return c; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/cosf4.h b/Extras/simdmathlibrary/spu/simdmath/cosf4.h new file mode 100644 index 000000000..c3d2be5bf --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/cosf4.h @@ -0,0 +1,46 @@ +/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_COSF4_H___ +#define ___SIMD_MATH_COSF4_H___ + +#include +#include + +#include + +static inline vector float +_cosf4 (vector float x) +{ + vec_float4 s, c; + _sincosf4(x, &s, &c); + return c; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/divd2.h b/Extras/simdmathlibrary/spu/simdmath/divd2.h new file mode 100644 index 000000000..35cfe6e5d --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/divd2.h @@ -0,0 +1,47 @@ +/* divd2 - for each of two double slots, divide numer by denom. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_DIVD2_H___ +#define ___SIMD_MATH_DIVD2_H___ + +// Equal to numer * recipd2(denom) +// See recipd2 for results of special values. + +#include +#include + +#include + +static inline vector double +_divd2 (vector double numer, vector double denom) +{ + return spu_mul( numer, _recipd2( denom ) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/divf4.c b/Extras/simdmathlibrary/spu/simdmath/divf4.h similarity index 78% rename from Extras/simdmathlibrary/spu/divf4.c rename to Extras/simdmathlibrary/spu/simdmath/divf4.h index 670b03f80..6edfab1c8 100644 --- a/Extras/simdmathlibrary/spu/divf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/divf4.h @@ -27,20 +27,24 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_DIVF4_H___ +#define ___SIMD_MATH_DIVF4_H___ + #include #include -vector float -divf4 (vector float numer, vector float denom) +static inline vector float +_divf4 (vector float numer, vector float denom) { - // Reciprocal estimate and 1 Newton-Raphson iteration. - // Uses constant of 1.0 + 1 ulp to improve accuracy. + // Reciprocal estimate and 1 Newton-Raphson iteration. + // Uses constant of 1.0 + 1 ulp to improve accuracy. - vector float y0, y0numer; - vector float oneish = (vector float)spu_splats(0x3f800001); + vector float y0, y0numer; + vector float oneish = (vector float)spu_splats(0x3f800001); - y0 = spu_re( denom ); - y0numer = spu_mul( numer, y0 ); - return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer ); + y0 = spu_re( denom ); + y0numer = spu_mul( numer, y0 ); + return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/divi4.h b/Extras/simdmathlibrary/spu/simdmath/divi4.h new file mode 100644 index 000000000..f4f7e9668 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/divi4.h @@ -0,0 +1,67 @@ +/* divi4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_DIVI4_H___ +#define ___SIMD_MATH_DIVI4_H___ + +#include +#include + +#include + +// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom +// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +static inline divi4_t +_divi4 (vector signed int numer, vector signed int denom) +{ + divu4_t resAbs; + divi4_t res; + vec_uint4 numerPos, denomPos, quotNeg; + vec_uint4 numerAbs, denomAbs; + + // Determine whether result needs sign change + + numerPos = spu_cmpgt( numer, -1 ); + denomPos = spu_cmpgt( denom, -1 ); + quotNeg = spu_xor( numerPos, denomPos ); + + // Use absolute values of numerator, denominator + + numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos ); + denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos ); + + resAbs = _divu4(numerAbs, denomAbs); + + res.quot = spu_sel( (vec_int4)resAbs.quot, spu_sub( 0, (vec_int4)resAbs.quot ), quotNeg ); + res.rem = spu_sel( spu_sub( 0, (vec_int4)resAbs.rem ), (vec_int4)resAbs.rem, numerPos ); + return res; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/divu4.c b/Extras/simdmathlibrary/spu/simdmath/divu4.h similarity index 71% rename from Extras/simdmathlibrary/spu/divu4.c rename to Extras/simdmathlibrary/spu/simdmath/divu4.h index f6d5342b4..f28c09dc6 100644 --- a/Extras/simdmathlibrary/spu/divu4.c +++ b/Extras/simdmathlibrary/spu/simdmath/divu4.h @@ -27,44 +27,48 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_DIVU4_H___ +#define ___SIMD_MATH_DIVU4_H___ + #include #include // divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom // and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator. -divu4_t divu4 (vector unsigned int numer, vector unsigned int denom) +static inline divu4_t +_divu4 (vector unsigned int numer, vector unsigned int denom) { - divu4_t res; - vec_int4 shift; - vec_uint4 quot, newQuot; - vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted; - vec_uint4 newNum, skip, cont; - int anyCont; + divu4_t res; + vec_int4 shift; + vec_uint4 quot, newQuot; + vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted; + vec_uint4 newNum, skip, cont; + int anyCont; - // Get difference of leading zeros. - // Any possible negative value will be interpreted as a shift > 31 + // Get difference of leading zeros. + // Any possible negative value will be interpreted as a shift > 31 - denomZeros = spu_cntlz( denom ); - numerZeros = spu_cntlz( numer ); + denomZeros = spu_cntlz( denom ); + numerZeros = spu_cntlz( numer ); - shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); - // Shift denom to align leading one with numerator's + // Shift denom to align leading one with numerator's - denomShifted = spu_sl( denom, (vec_uint4)shift ); - oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift ); - oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) ); + denomShifted = spu_sl( denom, (vec_uint4)shift ); + oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift ); + oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) ); - // Shift left all leading zeros. + // Shift left all leading zeros. - denomLeft = spu_sl( denom, denomZeros ); - oneLeft = spu_sl( spu_splats(1U), denomZeros ); + denomLeft = spu_sl( denom, denomZeros ); + oneLeft = spu_sl( spu_splats(1U), denomZeros ); - quot = spu_splats(0U); + quot = spu_splats(0U); - do - { + do + { cont = spu_cmpgt( oneShifted, 0U ); anyCont = spu_extract( spu_gather( cont ), 0 ); @@ -87,11 +91,12 @@ divu4_t divu4 (vector unsigned int numer, vector unsigned int denom) quot = spu_sel( newQuot, quot, skip ); numer = spu_sel( newNum, numer, spu_orc(skip,cont) ); - } - while ( anyCont ); + } + while ( anyCont ); - res.quot = quot; - res.rem = numer; - return res; + res.quot = quot; + res.rem = numer; + return res; } +#endif diff --git a/Extras/simdmathlibrary/spu/exp2f4.c b/Extras/simdmathlibrary/spu/simdmath/exp2f4.h similarity index 92% rename from Extras/simdmathlibrary/spu/exp2f4.c rename to Extras/simdmathlibrary/spu/simdmath/exp2f4.h index 88354bfd1..4e8a9ab7e 100644 --- a/Extras/simdmathlibrary/spu/exp2f4.c +++ b/Extras/simdmathlibrary/spu/simdmath/exp2f4.h @@ -27,6 +27,8 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXP2F4_H___ +#define ___SIMD_MATH_EXP2F4_H___ #include #include @@ -72,10 +74,10 @@ */ -#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */ +#define __EXP2F_LN2 0.69314718055995f /* ln(2) */ -vector float -exp2f4 (vector float x) +static inline vector float +_exp2f4 (vector float x) { vec_int4 ix; vec_uint4 overflow, underflow; @@ -91,7 +93,7 @@ exp2f4 (vector float x) bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias)); ix = spu_convts(spu_add(x, bias), 0); frac = spu_sub(spu_convtf(ix, 0), x); - frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2)); + frac = spu_mul(frac, spu_splats(__EXP2F_LN2)); // !!! HRD Changing weird un-understandable and incorrect overflow handling code //overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000)); @@ -99,7 +101,7 @@ exp2f4 (vector float x) underflow = spu_cmpgt(spu_splats(-126.0f), x); //exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for - // !!! overflow (x >= 127.999999f) + // !!! overflow (x >= 127.999999f) exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation exp_int = spu_add(exp_int, exp_int); // !!! HRD @@ -123,9 +125,11 @@ exp2f4 (vector float x) result = spu_mul(exp_frac, exp_int); /* Handle overflow */ - result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow); - result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow); + result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), overflow); + result = spu_sel(result, (vec_float4)spu_splats(0), underflow); //result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255)); return (result); } + +#endif diff --git a/Extras/simdmathlibrary/spu/expf4.c b/Extras/simdmathlibrary/spu/simdmath/expf4.h similarity index 68% rename from Extras/simdmathlibrary/spu/expf4.c rename to Extras/simdmathlibrary/spu/simdmath/expf4.h index e5ca4ec7d..d870d8243 100644 --- a/Extras/simdmathlibrary/spu/expf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/expf4.h @@ -27,37 +27,44 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXPF4_H___ +#define ___SIMD_MATH_EXPF4_H___ #include #include -#define _EXPF_H_C1 ((float)-0.6931470632553101f) -#define _EXPF_H_C2 ((float)-1.1730463525082e-7f) +#include +#include -#define _EXPF_H_INVLN2 ((float)1.4426950408889634f) +#define __EXPF_C1 -0.6931470632553101f +#define __EXPF_C2 -1.1730463525082e-7f -vector float -expf4 (vector float x) +#define __EXPF_INVLN2 1.4426950408889634f + +static inline vector float +_expf4 (vector float x) { - vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x); - vec_float4 goffset = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask); - vec_float4 g = spu_mul(x, spu_splats(_EXPF_H_INVLN2)); + vec_uint4 xnegmask = spu_cmpgt(spu_splats(0.0f), x); + vec_float4 goffset = spu_sel(spu_splats(0.5f),spu_splats(-0.5f),xnegmask); + vec_float4 g = spu_mul(x, spu_splats(__EXPF_INVLN2)); vec_int4 xexp = spu_convts(spu_add(g, goffset),0); g = spu_convtf(xexp, 0); - g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x)); + g = spu_madd(g, spu_splats(__EXPF_C2), spu_madd(g, spu_splats(__EXPF_C1), x)); vec_float4 z = spu_mul(g, g); - vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f)); + vec_float4 a = spu_mul(z, spu_splats(0.0999748594f)); vec_float4 b = spu_mul(g, spu_madd(z, - spu_splats((float)0.0083208258f), - spu_splats((float)0.4999999992f) + spu_splats(0.0083208258f), + spu_splats(0.4999999992f) ) ); - vec_float4 foo = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)), - spu_add(spu_splats(1.0f), spu_sub(a, b))); + vec_float4 foo = _divf4(spu_add(spu_splats(1.0f), spu_add(a, b)), + spu_add(spu_splats(1.0f), spu_sub(a, b))); - return ldexpf4(foo, xexp); + return _ldexpf4(foo, xexp); } + +#endif diff --git a/Extras/simdmathlibrary/spu/expm1f4.c b/Extras/simdmathlibrary/spu/simdmath/expm1f4.h similarity index 69% rename from Extras/simdmathlibrary/spu/expm1f4.c rename to Extras/simdmathlibrary/spu/simdmath/expm1f4.h index b2dde1419..78e63abfd 100644 --- a/Extras/simdmathlibrary/spu/expm1f4.c +++ b/Extras/simdmathlibrary/spu/simdmath/expm1f4.h @@ -27,28 +27,36 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_EXPMLF4_H___ +#define ___SIMD_MATH_EXPMLF4_H___ + #include #include -#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f) -#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f) +#include +#include -vector float -expm1f4 (vector float x) +#define __EXPM1F_ln1by2 -0.6931471805599f +#define __EXPM1F_ln3by2 0.4054651081082f + +static inline vector float +_expm1f4 (vector float x) { - vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)), - spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x)); + vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(__EXPM1F_ln1by2)), + spu_cmpgt(spu_splats(__EXPM1F_ln3by2), x)); vec_float4 x2 = spu_mul(x,x); vec_float4 d0, d1, n0, n1; - d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f)); - d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0); + d0 = spu_madd(x , spu_splats(-0.3203561199f), spu_splats(0.9483177697f)); + d1 = spu_madd(x2, spu_splats(0.0326527809f), d0); - n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f)); - n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f)); + n0 = spu_madd(x , spu_splats(0.1538026623f), spu_splats(0.9483177732f)); + n1 = spu_madd(x , spu_splats(0.0024490478f), spu_splats(0.0305274668f)); n1 = spu_madd(x2, n1, n0); - return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)), - spu_mul(x, divf4(n1, d1)), + return spu_sel(spu_sub(_expf4(x), spu_splats(1.0f)), + spu_mul(x, _divf4(n1, d1)), nearzeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/fabsd2.h b/Extras/simdmathlibrary/spu/simdmath/fabsd2.h new file mode 100644 index 000000000..3ba4e39ea --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/fabsd2.h @@ -0,0 +1,42 @@ +/* fabsd2 - for each of two double slots, compute absolute value. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FABSD2_H___ +#define ___SIMD_MATH_FABSD2_H___ + +#include +#include + +static inline vector double +_fabsd2 (vector double x) +{ + return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/fabsf4.c b/Extras/simdmathlibrary/spu/simdmath/fabsf4.h similarity index 89% rename from Extras/simdmathlibrary/spu/fabsf4.c rename to Extras/simdmathlibrary/spu/simdmath/fabsf4.h index 4086b12d8..bad63e5b1 100644 --- a/Extras/simdmathlibrary/spu/fabsf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fabsf4.h @@ -27,11 +27,16 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FABSF4_H___ +#define ___SIMD_MATH_FABSF4_H___ + #include #include -vector float fabsf4 (vector float x) +static inline vector float +_fabsf4 (vector float x) { - return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) ); + return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/fdimd2.h b/Extras/simdmathlibrary/spu/simdmath/fdimd2.h new file mode 100644 index 000000000..5cbb1a3ff --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/fdimd2.h @@ -0,0 +1,51 @@ +/* fdimd2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FDIMD2_H___ +#define ___SIMD_MATH_FDIMD2_H___ + +#include +#include + +/* fdim_v - compute the positive difference of x and y. + */ +static inline vector double +_fdimd2 (vector double x, vector double y) +{ + vec_double2 v; + vec_uint4 mask; + + v = spu_sub(x, y); + mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31)); + + return (v); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/fdimf4.c b/Extras/simdmathlibrary/spu/simdmath/fdimf4.h similarity index 92% rename from Extras/simdmathlibrary/spu/fdimf4.c rename to Extras/simdmathlibrary/spu/simdmath/fdimf4.h index 71446bd05..f81482f1c 100644 --- a/Extras/simdmathlibrary/spu/fdimf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fdimf4.h @@ -27,12 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FDIMF4_H___ +#define ___SIMD_MATH_FDIMF4_H___ + #include #include -vector float -fdimf4 (vector float x, vector float y) +static inline vector float +_fdimf4 (vector float x, vector float y) { vec_float4 diff = spu_sub(x,y); return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y)); } + +#endif diff --git a/Extras/simdmathlibrary/spu/floord2.c b/Extras/simdmathlibrary/spu/simdmath/floord2.h similarity index 96% rename from Extras/simdmathlibrary/spu/floord2.c rename to Extras/simdmathlibrary/spu/simdmath/floord2.h index 648a84dd3..e2499883f 100644 --- a/Extras/simdmathlibrary/spu/floord2.c +++ b/Extras/simdmathlibrary/spu/simdmath/floord2.h @@ -27,11 +27,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FLOORD2_H___ +#define ___SIMD_MATH_FLOORD2_H___ + #include #include -vector double -floord2(vector double in) +static inline vector double +_floord2(vector double in) { vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}); vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); @@ -74,7 +77,7 @@ floord2(vector double in) equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0); addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words))); - /* insert + /* insert */ e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero); e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words)); @@ -92,3 +95,5 @@ floord2(vector double in) return (out); } + +#endif diff --git a/Extras/simdmathlibrary/spu/floorf4.c b/Extras/simdmathlibrary/spu/simdmath/floorf4.h similarity index 74% rename from Extras/simdmathlibrary/spu/floorf4.c rename to Extras/simdmathlibrary/spu/simdmath/floorf4.h index 03d018f3d..2e8f62508 100644 --- a/Extras/simdmathlibrary/spu/floorf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/floorf4.h @@ -27,28 +27,32 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FLOORF4_H___ +#define ___SIMD_MATH_FLOORF4_H___ + #include #include -vector float -floorf4 (vector float x) +static inline vector float +_floorf4 (vector float x) { - vec_int4 xi, xi1; - vec_uint4 inrange; - vec_float4 truncated, truncated1; + vec_int4 xi, xi1; + vec_uint4 inrange; + vec_float4 truncated, truncated1; - // Find truncated value and one less. + // Find truncated value and one less. - inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x ); - xi = spu_convts( x, 0 ); - xi1 = spu_add( xi, -1 ); + xi = spu_convts( x, 0 ); + xi1 = spu_add( xi, -1 ); - truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); - truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); + truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange ); + truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange ); - // If truncated value is greater than input, subtract one. + // If truncated value is greater than input, subtract one. - return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) ); + return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/fmad2.h b/Extras/simdmathlibrary/spu/simdmath/fmad2.h new file mode 100644 index 000000000..b1047cde8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/fmad2.h @@ -0,0 +1,42 @@ +/* fmad2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FMAD2_H___ +#define ___SIMD_MATH_FMAD2_H___ + +#include +#include + +static inline vector double +_fmad2 (vector double x, vector double y, vector double z) +{ + return spu_madd(x,y,z); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/negatef4.c b/Extras/simdmathlibrary/spu/simdmath/fmaf4.h similarity index 89% rename from Extras/simdmathlibrary/spu/negatef4.c rename to Extras/simdmathlibrary/spu/simdmath/fmaf4.h index f9d5b6cb4..7dd344f16 100644 --- a/Extras/simdmathlibrary/spu/negatef4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmaf4.h @@ -1,4 +1,4 @@ -/* negatef4 - for each of four float slots, negate the sign bit. +/* fmaf4 Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. All rights reserved. @@ -27,12 +27,16 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMAF4_H___ +#define ___SIMD_MATH_FMAF4_H___ + #include #include - -vector float negatef4 (vector float x) +static inline vector float +_fmaf4 (vector float x, vector float y, vector float z) { - return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) ); + return spu_madd(x,y,z); } +#endif diff --git a/Extras/simdmathlibrary/spu/fmaxd2.c b/Extras/simdmathlibrary/spu/simdmath/fmaxd2.h similarity index 95% rename from Extras/simdmathlibrary/spu/fmaxd2.c rename to Extras/simdmathlibrary/spu/simdmath/fmaxd2.h index 0ffd4628d..944881fc9 100644 --- a/Extras/simdmathlibrary/spu/fmaxd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmaxd2.h @@ -27,6 +27,8 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMAXD2_H___ +#define ___SIMD_MATH_FMAXD2_H___ #include #include @@ -36,8 +38,8 @@ * is returned. */ -vector double -fmaxd2 (vector double x, vector double y) +static inline vector double +_fmaxd2 (vector double x, vector double y) { vec_ullong2 selector, denorm; vec_double2 x_offset, y_offset, diff; @@ -66,3 +68,4 @@ fmaxd2 (vector double x, vector double y) return spu_sel(x, y, selector); } +#endif diff --git a/Extras/simdmathlibrary/spu/fmaxf4.c b/Extras/simdmathlibrary/spu/simdmath/fmaxf4.h similarity index 90% rename from Extras/simdmathlibrary/spu/fmaxf4.c rename to Extras/simdmathlibrary/spu/simdmath/fmaxf4.h index 1d785125c..cdaecdc94 100644 --- a/Extras/simdmathlibrary/spu/fmaxf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmaxf4.h @@ -27,14 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMAXF4_H___ +#define ___SIMD_MATH_FMAXF4_H___ #include #include -vector float -fmaxf4 (vector float x, vector float y) +static inline vector float +_fmaxf4 (vector float x, vector float y) { - return spu_sel( x, y, spu_cmpgt( y, x ) ); + return spu_sel( x, y, spu_cmpgt( y, x ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/fmind2.c b/Extras/simdmathlibrary/spu/simdmath/fmind2.h similarity index 95% rename from Extras/simdmathlibrary/spu/fmind2.c rename to Extras/simdmathlibrary/spu/simdmath/fmind2.h index 47b0a060c..df641c04d 100644 --- a/Extras/simdmathlibrary/spu/fmind2.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmind2.h @@ -27,6 +27,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMIND2_H___ +#define ___SIMD_MATH_FMIND2_H___ + #include #include @@ -35,8 +38,8 @@ * is returned. */ -vector double -fmind2 (vector double x, vector double y) +static inline vector double +_fmind2 (vector double x, vector double y) { vec_ullong2 selector, denorm; vec_double2 x_offset, y_offset, diff; @@ -65,3 +68,4 @@ fmind2 (vector double x, vector double y) return spu_sel(x, y, selector); } +#endif diff --git a/Extras/simdmathlibrary/spu/fminf4.c b/Extras/simdmathlibrary/spu/simdmath/fminf4.h similarity index 90% rename from Extras/simdmathlibrary/spu/fminf4.c rename to Extras/simdmathlibrary/spu/simdmath/fminf4.h index 7963b7f85..c422e6010 100644 --- a/Extras/simdmathlibrary/spu/fminf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fminf4.h @@ -27,14 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMINF4_H___ +#define ___SIMD_MATH_FMINF4_H___ #include #include -vector float -fminf4 (vector float x, vector float y) +static inline vector float +_fminf4 (vector float x, vector float y) { - return spu_sel( x, y, spu_cmpgt( x, y ) ); + return spu_sel( x, y, spu_cmpgt( x, y ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/fmodd2.c b/Extras/simdmathlibrary/spu/simdmath/fmodd2.h similarity index 83% rename from Extras/simdmathlibrary/spu/fmodd2.c rename to Extras/simdmathlibrary/spu/simdmath/fmodd2.h index 40f7e8125..cf7b71722 100644 --- a/Extras/simdmathlibrary/spu/fmodd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmodd2.h @@ -27,10 +27,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMODD2_H___ +#define ___SIMD_MATH_FMODD2_H___ #include #include +#include + /* * a vector is returned that contains the remainder of xi/yi, * for coresponding elements of vector double x and vector double y, @@ -41,11 +45,8 @@ * magnitude less than |yi| */ -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb); -static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb); - -vector double fmodd2(vector double x, vector double y) +static inline vector double +_fmodd2(vector double x, vector double y) { int shift0, shift1; vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; @@ -82,20 +83,20 @@ vector double fmodd2(vector double x, vector double y) exp_y = spu_rlmask(y_hi, -20); // y>x - resultx = _vec_gt64(abs_y, abs_x); + resultx = __vec_gt64(abs_y, abs_x); //is Inf, is Nan x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff)); - x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); + x_inf = __vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); x_nan = spu_andc(x_7ff, x_inf); y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff)); - y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); + y_inf = __vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0})); y_nan = spu_andc(y_7ff, y_inf); // is zero - zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0)); - zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0)); + zero_x = __vec_eq64_half(abs_x, spu_splats((unsigned int)0x0)); + zero_y = __vec_eq64_half(abs_y, spu_splats((unsigned int)0x0)); /* Determine ilogb of abs_x and abs_y and @@ -121,8 +122,8 @@ vector double fmodd2(vector double x, vector double y) cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11); /* - mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x); - mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y); + mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x); + mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y); */ //norm mant_x_norm = spu_or(implied_1, frac_x); @@ -225,8 +226,8 @@ vector double fmodd2(vector double x, vector double y) shift0 = spu_extract(cnt, 0); shift1 = spu_extract(cnt, 2); /* - norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0); - norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1); + norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0); + norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1); */ norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0); norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1); @@ -236,11 +237,11 @@ vector double fmodd2(vector double x, vector double y) //denorm /* - shift = spu_add((vec_int4)exp_y, -1); - shift0 = spu_extract(shift, 0); - shift1 = spu_extract(shift, 2); - denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0); - denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1); + shift = spu_add((vec_int4)exp_y, -1); + shift0 = spu_extract(shift, 0); + shift1 = spu_extract(shift, 2); + denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0); + denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1); */ shift = spu_add(power, -1); shift0 = spu_extract(shift, 0); @@ -278,25 +279,4 @@ vector double fmodd2(vector double x, vector double y) return ((vec_double2)result); } - -/* - * extend spu_cmpgt function to 64bit data - */ -static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb - vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb - return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right -} -static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right - return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11})); -} - -static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb) -{ - vec_uint4 eq = spu_cmpeq(aa, bb); - return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}))); -} - +#endif diff --git a/Extras/simdmathlibrary/spu/fmodf4.c b/Extras/simdmathlibrary/spu/simdmath/fmodf4.h similarity index 54% rename from Extras/simdmathlibrary/spu/fmodf4.c rename to Extras/simdmathlibrary/spu/simdmath/fmodf4.h index 7deae38fb..61b5473c8 100644 --- a/Extras/simdmathlibrary/spu/fmodf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fmodf4.h @@ -27,60 +27,68 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FMODF4_H___ +#define ___SIMD_MATH_FMODF4_H___ + #include #include +#include +#include +#include + // // This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. // If x == 0, the result is 0. // If x != 0 and y == 0, the result is undefined. -vector float -fmodf4 (vector float x, vector float y) +static inline vector float +_fmodf4 (vector float x, vector float y) { - vec_float4 q, xabs, yabs, qabs, xabs2; - vec_int4 qi0, qi1, qi2; - vec_float4 i0, i1, i2, r1, r2, i; - vec_uint4 inrange; + vec_float4 q, xabs, yabs, qabs, xabs2; + vec_int4 qi0, qi1, qi2; + vec_float4 i0, i1, i2, r1, r2, i; + vec_uint4 inrange; - // Find i = truncated_integer(|x/y|) + // Find i = truncated_integer(|x/y|) - // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. - // Thus i is either the truncated quotient, one less, or one greater. + // If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0. + // Thus i is either the truncated quotient, one less, or one greater. - q = divf4( x, y ); - xabs = fabsf4( x ); - yabs = fabsf4( y ); - qabs = fabsf4( q ); - xabs2 = spu_add( xabs, xabs ); + q = _divf4( x, y ); + xabs = _fabsf4( x ); + yabs = _fabsf4( y ); + qabs = _fabsf4( q ); + xabs2 = spu_add( xabs, xabs ); - inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); - inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); + inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); - qi1 = spu_convts( qabs, 0 ); - qi0 = spu_add( qi1, -1 ); - qi2 = spu_add( qi1, 1 ); + qi1 = spu_convts( qabs, 0 ); + qi0 = spu_add( qi1, -1 ); + qi2 = spu_add( qi1, 1 ); - i0 = spu_convtf( qi0, 0 ); - i1 = spu_convtf( qi1, 0 ); - i2 = spu_convtf( qi2, 0 ); + i0 = spu_convtf( qi0, 0 ); + i1 = spu_convtf( qi1, 0 ); + i2 = spu_convtf( qi2, 0 ); - // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as - // 2*|x| - i*|y| >= |x|: - // - // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. - // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed - // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. + // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as + // 2*|x| - i*|y| >= |x|: + // + // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. + // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed + // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. - r1 = spu_nmsub( i1, yabs, xabs2 ); - r2 = spu_nmsub( i2, yabs, xabs2 ); + r1 = spu_nmsub( i1, yabs, xabs2 ); + r2 = spu_nmsub( i2, yabs, xabs2 ); - i = i0; - i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) ); - i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) ); + i = i0; + i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) ); + i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) ); - i = copysignf4( i, q ); + i = _copysignf4( i, q ); - return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); + return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h b/Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h new file mode 100644 index 000000000..bbccbfc57 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h @@ -0,0 +1,83 @@ +/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FPCLASSIFYD2_H___ +#define ___SIMD_MATH_FPCLASSIFYD2_H___ + +#include +#include +#include + +static inline vector signed long long +_fpclassifyd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 mask; + vec_llong2 classtype; + vec_uint4 cmpgt, cmpeq; + + //FP_NORMAL: normal unless nan, infinity, zero, or denorm + classtype = spu_splats((long long)FP_NORMAL); + + //FP_NAN: all-ones exponent and non-zero mantissa + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn ); + mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask ); + + //FP_INFINITE: all-ones exponent and zero mantissa + mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask ); + + //FP_ZERO: zero exponent and zero mantissa + cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask ); + + //FP_SUBNORMAL: zero exponent and non-zero mantissa + cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero ); + cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero ); + mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) ); + classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask ); + + return classtype; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/fpclassifyf4.c b/Extras/simdmathlibrary/spu/simdmath/fpclassifyf4.h similarity index 57% rename from Extras/simdmathlibrary/spu/fpclassifyf4.c rename to Extras/simdmathlibrary/spu/simdmath/fpclassifyf4.h index 0fdc8d08d..fc251f998 100644 --- a/Extras/simdmathlibrary/spu/fpclassifyf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/fpclassifyf4.h @@ -27,52 +27,41 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FPCLASSIFYF4_H___ +#define ___SIMD_MATH_FPCLASSIFYF4_H___ + #include #include #include -#ifndef FP_NAN -#define FP_NAN (0) -#endif -#ifndef FP_INFINITE -#define FP_INFINITE (1) -#endif -#ifndef FP_ZERO -#define FP_ZERO (2) -#endif -#ifndef FP_SUBNORMAL -#define FP_SUBNORMAL (3) -#endif -#ifndef FP_NORMAL -#define FP_NORMAL (4) -#endif - -vector signed int -fpclassifyf4 (vector float x) +static inline vector signed int +_fpclassifyf4 (vector float x) { - vec_uint4 zero = spu_splats((unsigned int)0x00000000); + vec_uint4 zero = spu_splats((unsigned int)0x00000000); - vec_uint4 mask; - vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff); - vec_int4 classtype = (vec_int4)zero; + vec_uint4 mask; + vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff); + vec_int4 classtype = (vec_int4)zero; - //FP_NAN: NaN not supported on SPU, never return FP_NAN + //FP_NAN: NaN not supported on SPU, never return FP_NAN - //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE + //FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE - //FP_ZERO: zero exponent and zero mantissa - mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero ); - classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask ); - unclassified = spu_andc( unclassified, mask ); + //FP_ZERO: zero exponent and zero mantissa + mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero ); + classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask ); + unclassified = spu_andc( unclassified, mask ); - //FP_SUBNORMAL: zero exponent and non-zero mantissa - mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ), - spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) ); - classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask ); - unclassified = spu_andc( unclassified, mask ); + //FP_SUBNORMAL: zero exponent and non-zero mantissa + mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ), + spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) ); + classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask ); + unclassified = spu_andc( unclassified, mask ); - //FP_NORMAL: none of the above - classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified ); + //FP_NORMAL: none of the above + classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified ); - return classtype; + return classtype; } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/frexpd2.h b/Extras/simdmathlibrary/spu/simdmath/frexpd2.h new file mode 100644 index 000000000..956412953 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/frexpd2.h @@ -0,0 +1,98 @@ +/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x' + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_FREXPD2_H___ +#define ___SIMD_MATH_FREXPD2_H___ + +#include +#include +#include + +#define __FREXPD_DBL_NAN 0x7FF8000000000000ull + +static inline vector double +_frexpd2 (vector double x, vector signed long long *pexp) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull}; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, isinf, iszero; + vec_ullong2 e0, x0, x1; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, sh, ex; + vec_double2 fr, frac = (vec_double2)zero; + + //NAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) ); + isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even ); + frac = spu_sel( frac, (vec_double2)spu_splats(__FREXPD_DBL_NAN), isnan ); + + //INF: x is infinite (all-ones exponent and zero mantissa) + isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + frac = spu_sel( frac, x , isinf ); + + //x is zero (zero exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); + + frac = spu_sel( frac, (vec_double2)zero , iszero ); + *pexp = spu_sel( *pexp, (vec_llong2)zero , iszero ); + + //Integer Exponent: if x is normal or subnormal + + //...shift left to normalize fraction, zero shift if normal + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) ); + sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) ); + + x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) ); + x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) ); + fr = (vec_double2)spu_sel( x1, x0, maskdw ); + fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn ); + fr = spu_sel( fr, x, sign ); + + e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 ); + ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) ); + + frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) ); + *pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) ); + + return frac; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/frexpf4.c b/Extras/simdmathlibrary/spu/simdmath/frexpf4.h similarity index 86% rename from Extras/simdmathlibrary/spu/frexpf4.c rename to Extras/simdmathlibrary/spu/simdmath/frexpf4.h index a1c17b335..698f79028 100644 --- a/Extras/simdmathlibrary/spu/frexpf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/frexpf4.h @@ -27,21 +27,26 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_FREXPF4_H___ +#define ___SIMD_MATH_FREXPF4_H___ + #include #include -vector float -frexpf4 (vector float x, vector signed int *pexp) +static inline vector float +_frexpf4 (vector float x, vector signed int *pexp) { vec_int4 zeros = spu_splats((int)0); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros); - vec_int4 expmask = spu_splats((int)0x7F800000); - vec_int4 e1 = spu_and((vec_int4)x, expmask); + vec_uint4 expmask = spu_splats(0x7F800000U); + vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask); vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126)); *pexp = spu_sel(e2, zeros, zeromask); - vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask); return spu_sel(m2, (vec_float4)zeros, zeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/hypotd2.h b/Extras/simdmathlibrary/spu/simdmath/hypotd2.h new file mode 100644 index 000000000..e2676d4b5 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/hypotd2.h @@ -0,0 +1,47 @@ +/* hypotd2 - for each element of vector x and y, return the square root of (x')^2 + (y')^2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_HYPOTD2_H___ +#define ___SIMD_MATH_HYPOTD2_H___ + +#include +#include + +#include + +static inline vector double +_hypotd2 (vector double x, vector double y) +{ + vec_double2 sum = spu_mul(x,x); + sum = spu_madd(y,y,sum); + + return _sqrtd2(sum); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/hypotf4.c b/Extras/simdmathlibrary/spu/simdmath/hypotf4.h similarity index 90% rename from Extras/simdmathlibrary/spu/hypotf4.c rename to Extras/simdmathlibrary/spu/simdmath/hypotf4.h index 4b7336a78..b607a847e 100644 --- a/Extras/simdmathlibrary/spu/hypotf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/hypotf4.h @@ -27,14 +27,21 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_HYPOTF4_H___ +#define ___SIMD_MATH_HYPOTF4_H___ + #include #include -vector float -hypotf4 (vector float x, vector float y) +#include + +static inline vector float +_hypotf4 (vector float x, vector float y) { vec_float4 sum = spu_mul(x,x); sum = spu_madd(y,y,sum); - return sqrtf4(sum); + return _sqrtf4(sum); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/ilogbd2.h b/Extras/simdmathlibrary/spu/simdmath/ilogbd2.h new file mode 100644 index 000000000..91039259e --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/ilogbd2.h @@ -0,0 +1,83 @@ +/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ILOGBD2_H___ +#define ___SIMD_MATH_ILOGBD2_H___ + +#include +#include +#include +#include + +static inline vector signed long long +_ilogbd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, iszeroinf; + vec_llong2 ilogb = (vec_llong2)zero; + vec_llong2 e1, e2; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, lz1; + + //FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan ); + + //FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ), + spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) ); + ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf ); + + //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x + e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); + e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 ); + + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); + lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); + + ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) ); + + return ilogb; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/ilogbf4.c b/Extras/simdmathlibrary/spu/simdmath/ilogbf4.h similarity index 91% rename from Extras/simdmathlibrary/spu/ilogbf4.c rename to Extras/simdmathlibrary/spu/simdmath/ilogbf4.h index e65197f19..6ebd88a83 100644 --- a/Extras/simdmathlibrary/spu/ilogbf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/ilogbf4.h @@ -27,22 +27,24 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ILOGBF4_H___ +#define ___SIMD_MATH_ILOGBF4_H___ + #include #include +#include #include -#ifndef FP_ILOGB0 -#define FP_ILOGB0 ((int)0x80000001) -#endif - -vector signed int -ilogbf4 (vector float x) +static inline vector signed int +_ilogbf4 (vector float x) { vec_int4 minus127 = spu_splats((int)-127); vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000)); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0); + vec_uint4 zeromask = spu_cmpeq(e1, 0); vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127); return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/irintf4.h b/Extras/simdmathlibrary/spu/simdmath/irintf4.h new file mode 100644 index 000000000..4c56db686 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/irintf4.h @@ -0,0 +1,45 @@ +/* irintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model. + On SPU, the rounding mode for float is always towards zero. + vector singned int is returned. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_IRINTF4_H___ +#define ___SIMD_MATH_IRINTF4_H___ + +#include +#include + +static inline vector signed int +_irintf4(vector float in) +{ + return spu_convts(in,0); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/iroundf4.h b/Extras/simdmathlibrary/spu/simdmath/iroundf4.h new file mode 100644 index 000000000..c2cc36cf4 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/iroundf4.h @@ -0,0 +1,61 @@ +/* iroundf4 - for each of four float slots, round to the nearest integer, + halfway cases are rounded away form zero. + + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_IROUNDF4_H___ +#define ___SIMD_MATH_IROUNDF4_H___ + +#include +#include + +static inline vector signed int +_iroundf4(vector float in) +{ + vec_int4 exp, out; + vec_uint4 addend; + + /* Add 0.5 (fixed precision to eliminate rounding issues + */ + exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF)); + + addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp), + spu_cmpgt((vec_uint4)exp, -31)); + + in = (vec_float4)spu_add((vec_uint4)in, addend); + + + /* Truncate the result. + */ + out = spu_convts(in,0); + + return (out); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/is0denormd2.h b/Extras/simdmathlibrary/spu/simdmath/is0denormd2.h new file mode 100644 index 000000000..fcc76ce08 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/is0denormd2.h @@ -0,0 +1,51 @@ +/* is0denormd2 - for each of two double slots, if input equals 0 or denorm return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_IS0DENORMD2_H___ +#define ___SIMD_MATH_IS0DENORMD2_H___ + +#include +#include + + +static inline vector unsigned long long +_is0denormd2 (vector double x) +{ + vec_double2 xexp; + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) ); + cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) ); + cmp = spu_shuffle( cmp, cmp, even ); + + return cmp; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/is0denormf4.h b/Extras/simdmathlibrary/spu/simdmath/is0denormf4.h new file mode 100644 index 000000000..97549d7ed --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/is0denormf4.h @@ -0,0 +1,42 @@ +/* is0denormf4 - for each element of vector x, return a mask of ones if x' is zero or denorm, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_IS0DENORMF4_H___ +#define ___SIMD_MATH_IS0DENORMF4_H___ + +#include +#include + +static inline vector unsigned int +_is0denormf4 (vector float x) +{ + return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/isequald2.c b/Extras/simdmathlibrary/spu/simdmath/isequald2.h similarity index 60% rename from Extras/simdmathlibrary/spu/isequald2.c rename to Extras/simdmathlibrary/spu/simdmath/isequald2.h index 01a451d40..edf48c43d 100644 --- a/Extras/simdmathlibrary/spu/isequald2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isequald2.h @@ -27,28 +27,35 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISEQUALD2_H___ +#define ___SIMD_MATH_ISEQUALD2_H___ + #include #include -vector unsigned long long -isequald2 (vector double x, vector double y) +#include + +static inline vector unsigned long long +_isequald2 (vector double x, vector double y) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; - vec_ullong2 bothzero; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; + vec_ullong2 bothzero; - cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); - return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), - spu_or( isnand2( x ), isnand2( y ) ) ); + return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero), + spu_or( _isnand2( x ), _isnand2( y ) ) ); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isequalf4.h b/Extras/simdmathlibrary/spu/simdmath/isequalf4.h new file mode 100644 index 000000000..8af4d7042 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isequalf4.h @@ -0,0 +1,42 @@ +/* isequalf4 - for each element of vector x and y, return a mask of ones if x' is equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISEQUALF4_H___ +#define ___SIMD_MATH_ISEQUALF4_H___ + +#include +#include + +static inline vector unsigned int +_isequalf4 (vector float x, vector float y) +{ + return spu_cmpeq(x, y); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isfinited2.h b/Extras/simdmathlibrary/spu/simdmath/isfinited2.h new file mode 100644 index 000000000..4efc9c802 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isfinited2.h @@ -0,0 +1,51 @@ +/* isfinited2 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISFINITED2_H___ +#define ___SIMD_MATH_ISFINITED2_H___ + +#include +#include + +static inline vector unsigned long long +_isfinited2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 cmpr; + + //Finite unless NaN or Inf, check for 'not all-ones exponent' + + cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ); + cmpr = spu_shuffle( cmpr, cmpr, even); + + return cmpr; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isfinitef4.h b/Extras/simdmathlibrary/spu/simdmath/isfinitef4.h new file mode 100644 index 000000000..7846f2e51 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isfinitef4.h @@ -0,0 +1,45 @@ +/* isfinitef4 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISFINITEF4_H___ +#define ___SIMD_MATH_ISFINITEF4_H___ + +#include +#include + +static inline vector unsigned int +_isfinitef4 (vector float x) +{ + (void)x; + + // NaN, INF not supported on SPU, result always a mask of ones + return spu_splats((unsigned int)0xffffffff); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/isgreaterd2.c b/Extras/simdmathlibrary/spu/simdmath/isgreaterd2.h similarity index 50% rename from Extras/simdmathlibrary/spu/isgreaterd2.c rename to Extras/simdmathlibrary/spu/simdmath/isgreaterd2.h index 8aee1e27e..d4fa0c7ca 100644 --- a/Extras/simdmathlibrary/spu/isgreaterd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterd2.h @@ -27,39 +27,45 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISGREATERD2_H___ +#define ___SIMD_MATH_ISGREATERD2_H___ + #include #include -vector unsigned long long -isgreaterd2 (vector double x, vector double y) +#include + +static inline vector unsigned long long +_isgreaterd2 (vector double x, vector double y) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; - vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; - vec_ullong2 bothneg, bothzero; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; - cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); - cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); - cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), - spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); - cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); - cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); - bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); - bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); - bothneg = spu_shuffle( bothneg, bothneg, even ); + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); - return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), - spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); + return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), + spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/isgreaterequald2.c b/Extras/simdmathlibrary/spu/simdmath/isgreaterequald2.h similarity index 50% rename from Extras/simdmathlibrary/spu/isgreaterequald2.c rename to Extras/simdmathlibrary/spu/simdmath/isgreaterequald2.h index 4de07d1a9..5f1498841 100644 --- a/Extras/simdmathlibrary/spu/isgreaterequald2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterequald2.h @@ -27,41 +27,47 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISGREATEREQUALD2_H___ +#define ___SIMD_MATH_ISGREATEREQUALD2_H___ + #include #include -vector unsigned long long -isgreaterequald2 (vector double x, vector double y) +#include + +static inline vector unsigned long long +_isgreaterequald2 (vector double x, vector double y) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; - vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; - vec_ullong2 bothneg, bothzero; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; - cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); - cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); - cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), - spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); - cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); - cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); - cmpeq_ll = spu_or( cmpeq_ll, bothzero); + cmpeq_ll = spu_or( cmpeq_ll, bothzero); - bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); - bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); - bothneg = spu_shuffle( bothneg, bothneg, even ); + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); - return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ), - spu_or( isnand2 ( x ), isnand2 ( y ) ) ); + return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ), + spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isgreaterequalf4.h b/Extras/simdmathlibrary/spu/simdmath/isgreaterequalf4.h new file mode 100644 index 000000000..c95233499 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterequalf4.h @@ -0,0 +1,46 @@ +/* isgreaterequalf4 - for each element of vector x and y, return a mask of ones if x' is greater than or equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISGREATEREQUALF4_H___ +#define ___SIMD_MATH_ISGREATEREQUALF4_H___ + +#include +#include + +static inline vector unsigned int +_isgreaterequalf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpgt(y, x); + + return spu_nor(var, var); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isgreaterf4.h b/Extras/simdmathlibrary/spu/simdmath/isgreaterf4.h new file mode 100644 index 000000000..13c4c00c3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isgreaterf4.h @@ -0,0 +1,42 @@ +/* isgreaterf4 - for each element of vector x and y, return a mask of ones if x' is greater than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISGREATERF4_H___ +#define ___SIMD_MATH_ISGREATERF4_H___ + +#include +#include + +static inline vector unsigned int +_isgreaterf4 (vector float x, vector float y) +{ + return spu_cmpgt(x, y); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isinfd2.h b/Extras/simdmathlibrary/spu/simdmath/isinfd2.h new file mode 100644 index 000000000..ba4d608a7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isinfd2.h @@ -0,0 +1,51 @@ +/* isinfd2 - for each of two double slots, if input equals +Inf or -Inf return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISINFD2_H___ +#define ___SIMD_MATH_ISINFD2_H___ + +#include +#include + + +static inline vector unsigned long long +_isinfd2 (vector double x) +{ + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_double2 xabs; + vec_ullong2 cmp; + + xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) ); + cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) ); + + return cmp; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isinff4.h b/Extras/simdmathlibrary/spu/simdmath/isinff4.h new file mode 100644 index 000000000..a3872d768 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isinff4.h @@ -0,0 +1,45 @@ +/* isinff4 - for each element of vector x, return a mask of ones if x' is INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISINFF4_H___ +#define ___SIMD_MATH_ISINFF4_H___ + +#include +#include + +static inline vector unsigned int +_isinff4 (vector float x) +{ + (void)x; + + // INF not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/islessd2.c b/Extras/simdmathlibrary/spu/simdmath/islessd2.h similarity index 50% rename from Extras/simdmathlibrary/spu/islessd2.c rename to Extras/simdmathlibrary/spu/simdmath/islessd2.h index 7ab81c1de..9278e0e4e 100644 --- a/Extras/simdmathlibrary/spu/islessd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/islessd2.h @@ -27,38 +27,45 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISLESSD2_H___ +#define ___SIMD_MATH_ISLESSD2_H___ + #include #include -vector unsigned long long -islessd2 (vector double x, vector double y) +#include + +static inline vector unsigned long long +_islessd2 (vector double x, vector double y) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; - vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; - vec_ullong2 bothneg, bothzero; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; - cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x ); - cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x ); - cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x ); + cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x ); + cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x ); + cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x ); - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), - spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); - cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); - cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); - bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); - bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); - bothneg = spu_shuffle( bothneg, bothneg, even ); + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); - return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), - spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); + return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ), + spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) ); } + +#endif diff --git a/Extras/simdmathlibrary/spu/islessequald2.c b/Extras/simdmathlibrary/spu/simdmath/islessequald2.h similarity index 50% rename from Extras/simdmathlibrary/spu/islessequald2.c rename to Extras/simdmathlibrary/spu/simdmath/islessequald2.h index f09f245fb..51148cb1f 100644 --- a/Extras/simdmathlibrary/spu/islessequald2.c +++ b/Extras/simdmathlibrary/spu/simdmath/islessequald2.h @@ -27,40 +27,47 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISLESSEQUALD2_H___ +#define ___SIMD_MATH_ISLESSEQUALD2_H___ + #include #include -vector unsigned long long -islessequald2 (vector double x, vector double y) +#include + +static inline vector unsigned long long +_islessequald2 (vector double x, vector double y) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; - vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; - vec_ullong2 bothneg, bothzero; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even; + vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll; + vec_ullong2 bothneg, bothzero; - cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); - cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); - cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); + cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y ); - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), - spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); - cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); - cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ), + spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) ); + cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) ); + cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll ); - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); - cmpeq_ll = spu_or( cmpeq_ll, bothzero); + cmpeq_ll = spu_or( cmpeq_ll, bothzero); - bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); - bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); - bothneg = spu_shuffle( bothneg, bothneg, even ); + bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y ); + bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg ); + bothneg = spu_shuffle( bothneg, bothneg, even ); - return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), - spu_or( isnand2 ( x ), isnand2 ( y ) ) ); + return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll), + spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/islessequalf4.h b/Extras/simdmathlibrary/spu/simdmath/islessequalf4.h new file mode 100644 index 000000000..5e6b5f50b --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/islessequalf4.h @@ -0,0 +1,46 @@ +/* islessequalf4 - for each element of vector x and y, return a mask of ones if x' is less than or equal to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISLESSEQUALF4_H___ +#define ___SIMD_MATH_ISLESSEQUALF4_H___ + +#include +#include + +static inline vector unsigned int +_islessequalf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpgt(x, y); + + return spu_nor(var, var); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/islessf4.h b/Extras/simdmathlibrary/spu/simdmath/islessf4.h new file mode 100644 index 000000000..ca84fae93 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/islessf4.h @@ -0,0 +1,42 @@ +/* islessf4 - for each element of vector x and y, return a mask of ones if x' is less than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISLESSF4_H___ +#define ___SIMD_MATH_ISLESSF4_H___ + +#include +#include + +static inline vector unsigned int +_islessf4 (vector float x, vector float y) +{ + return spu_cmpgt(y, x); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/islessgreaterd2.c b/Extras/simdmathlibrary/spu/simdmath/islessgreaterd2.h similarity index 60% rename from Extras/simdmathlibrary/spu/islessgreaterd2.c rename to Extras/simdmathlibrary/spu/simdmath/islessgreaterd2.h index 89d4b90e7..7c23c8bdb 100644 --- a/Extras/simdmathlibrary/spu/islessgreaterd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/islessgreaterd2.h @@ -27,29 +27,35 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISLESSGREATERD2_H___ +#define ___SIMD_MATH_ISLESSGREATERD2_H___ + #include #include -vector unsigned long long -islessgreaterd2 (vector double x, vector double y) -{ - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; - vec_ullong2 sign = spu_splats(0x8000000000000000ull); - vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; - vec_ullong2 bothzero; - - cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); +#include - cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); - cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); +static inline vector unsigned long long +_islessgreaterd2 (vector double x, vector double y) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd; + vec_ullong2 bothzero; - bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); - bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); - bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y ); + + cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even ); + cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd ); - return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd), - spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) ); + bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign ); + bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U ); + bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) ); + + return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd), + spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/islessgreaterf4.h b/Extras/simdmathlibrary/spu/simdmath/islessgreaterf4.h new file mode 100644 index 000000000..0143b49df --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/islessgreaterf4.h @@ -0,0 +1,46 @@ +/* islessgreaterf4 - for each element of vector x and y, return a mask of ones if x' is less than or greater than y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISLESSGREATERF4_H___ +#define ___SIMD_MATH_ISLESSGREATERF4_H___ + +#include +#include + +static inline vector unsigned int +_islessgreaterf4 (vector float x, vector float y) +{ + vec_uint4 var; + + var = spu_cmpeq(x, y); + + return spu_nor(var, var); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/isnand2.c b/Extras/simdmathlibrary/spu/simdmath/isnand2.h similarity index 67% rename from Extras/simdmathlibrary/spu/isnand2.c rename to Extras/simdmathlibrary/spu/simdmath/isnand2.h index 12e7c3e77..eabb20c53 100644 --- a/Extras/simdmathlibrary/spu/isnand2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isnand2.h @@ -27,26 +27,30 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISNAND2_H___ +#define ___SIMD_MATH_ISNAND2_H___ + #include #include -vector unsigned long long -isnand2 (vector double x) +static inline vector unsigned long long +_isnand2 (vector double x) { - vec_double2 xneg; - vec_ullong2 cmpgt, cmpeq, cmpnan; - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull); + vec_double2 xneg; + vec_ullong2 cmpgt, cmpeq, cmpnan; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull); - xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); - cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask ); - cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask ); + xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask ); - cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ), - spu_and( spu_shuffle( cmpeq, cmpeq, even ), - spu_shuffle( cmpgt, cmpgt, odd ) ) ); + cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); - return cmpnan; + return cmpnan; } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isnanf4.h b/Extras/simdmathlibrary/spu/simdmath/isnanf4.h new file mode 100644 index 000000000..6dabfd0a6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isnanf4.h @@ -0,0 +1,45 @@ +/* isnanf4 - for each element of vector x, return a mask of ones if x' is NaN, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISNANF4_H___ +#define ___SIMD_MATH_ISNANF4_H___ + +#include +#include + +static inline vector unsigned int +_isnanf4 (vector float x) +{ + (void)x; + + // NaN not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/isnormald2.c b/Extras/simdmathlibrary/spu/simdmath/isnormald2.h similarity index 72% rename from Extras/simdmathlibrary/spu/isnormald2.c rename to Extras/simdmathlibrary/spu/simdmath/isnormald2.h index ae2897402..72d6c4d51 100644 --- a/Extras/simdmathlibrary/spu/isnormald2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isnormald2.h @@ -27,23 +27,27 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISNORMALD2_H___ +#define ___SIMD_MATH_ISNORMALD2_H___ + #include #include -vector unsigned long long -isnormald2 (vector double x) +static inline vector unsigned long long +_isnormald2 (vector double x) { - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); - vec_ullong2 cmpr; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 cmpr; - //Normal unless nan, infinite, denorm, or zero + //Normal unless nan, infinite, denorm, or zero - //Check for 'not zero or all-ones exponent' - cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ), - spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) ); - cmpr = spu_shuffle( cmpr, cmpr, even); + //Check for 'not zero or all-ones exponent' + cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ), + spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) ); + cmpr = spu_shuffle( cmpr, cmpr, even); - return cmpr; + return cmpr; } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isnormalf4.h b/Extras/simdmathlibrary/spu/simdmath/isnormalf4.h new file mode 100644 index 000000000..b0420d56d --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isnormalf4.h @@ -0,0 +1,43 @@ +/* isnormalf4 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISNORMALF4_H___ +#define ___SIMD_MATH_ISNORMALF4_H___ + +#include +#include + +static inline vector unsigned int +_isnormalf4 (vector float x) +{ + // NaN, INF not supported on SPU; normal unless zero + return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000)); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/isunorderedd2.c b/Extras/simdmathlibrary/spu/simdmath/isunorderedd2.h similarity index 64% rename from Extras/simdmathlibrary/spu/isunorderedd2.c rename to Extras/simdmathlibrary/spu/simdmath/isunorderedd2.h index ffcb3369a..f5bd7b471 100644 --- a/Extras/simdmathlibrary/spu/isunorderedd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/isunorderedd2.h @@ -27,37 +27,41 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ISUNORDEREDD2_H___ +#define ___SIMD_MATH_ISUNORDEREDD2_H___ + #include #include -vector unsigned long long -isunorderedd2 (vector double x, vector double y) +static inline vector unsigned long long +_isunorderedd2 (vector double x, vector double y) { - vec_double2 neg; - vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany; - vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; - vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull); - vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull); + vec_double2 neg; + vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull); + vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull); - //Check if x is nan - neg = (vec_double2)spu_or( (vec_ullong2)x, sign ); - cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); - cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); + //Check if x is nan + neg = (vec_double2)spu_or( (vec_ullong2)x, sign ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); - cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ), spu_and( spu_shuffle( cmpeq, cmpeq, even ), spu_shuffle( cmpgt, cmpgt, odd ) ) ); - //Check if y is nan - neg = (vec_double2)spu_or( (vec_ullong2)y, sign ); - cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); - cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); + //Check if y is nan + neg = (vec_double2)spu_or( (vec_ullong2)y, sign ); + cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn ); + cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn ); - cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ), + cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ), spu_and( spu_shuffle( cmpeq, cmpeq, even ), spu_shuffle( cmpgt, cmpgt, odd ) ) ); - return spu_or( cmpnanx, cmpnany ); + return spu_or( cmpnanx, cmpnany ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/isunorderedf4.h b/Extras/simdmathlibrary/spu/simdmath/isunorderedf4.h new file mode 100644 index 000000000..917bf4ad9 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/isunorderedf4.h @@ -0,0 +1,46 @@ +/* isunorderedf4 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_ISUNORDEREDF4_H___ +#define ___SIMD_MATH_ISUNORDEREDF4_H___ + +#include +#include + +static inline vector unsigned int +_isunorderedf4 (vector float x, vector float y) +{ + (void)x; + (void)y; + + // NaN not supported on SPU, result always zero + return spu_splats((unsigned int)0x00000000); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/ldexpd2.c b/Extras/simdmathlibrary/spu/simdmath/ldexpd2.h similarity index 95% rename from Extras/simdmathlibrary/spu/ldexpd2.c rename to Extras/simdmathlibrary/spu/simdmath/ldexpd2.h index 9fa555616..a5d9a0e9b 100644 --- a/Extras/simdmathlibrary/spu/ldexpd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/ldexpd2.h @@ -29,17 +29,20 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LDEXPD2_H___ +#define ___SIMD_MATH_LDEXPD2_H___ + #include #include -vector double -ldexpd2(vector double x, vector signed long long ex) +static inline vector double +_ldexpd2(vector double x, vector signed long long ex) { vec_int4 e1, e2; vec_int4 min = spu_splats(-2099); -// vec_int4 min = spu_splats(-2044); + // vec_int4 min = spu_splats(-2044); vec_int4 max = spu_splats( 2098); -// vec_int4 max = spu_splats( 2046); + // vec_int4 max = spu_splats( 2046); vec_uint4 cmp_min, cmp_max; vec_uint4 shift = ((vec_uint4){20, 32, 20, 32}); vec_double2 f1, f2; @@ -83,7 +86,7 @@ ldexpd2(vector double x, vector signed long long ex) /* Compute the product x * 2^e1 * 2^e2 */ -// out = spu_mul(spu_mul(x, f1), f2); + // out = spu_mul(spu_mul(x, f1), f2); // check floating point register DENORM bit vec_uint4 fpscr0, fpscr; @@ -159,7 +162,7 @@ ldexpd2(vector double x, vector signed long long ex) maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046)); maxmask = spu_shuffle(maxmask, maxmask, splat_msb); -// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); + // maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); minmask = spu_shuffle(minmask, minmask, splat_msb); @@ -245,9 +248,9 @@ ldexpd2(vector double x, vector signed long long ex) vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); // select right answer - x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask); - x = spu_sel(x, (vec_double2)zeros, minmask); - x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask); + x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask); + x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask); + x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask); out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange); @@ -260,4 +263,4 @@ ldexpd2(vector double x, vector signed long long ex) return out; } - +#endif diff --git a/Extras/simdmathlibrary/spu/ldexpf4.c b/Extras/simdmathlibrary/spu/simdmath/ldexpf4.h similarity index 79% rename from Extras/simdmathlibrary/spu/ldexpf4.c rename to Extras/simdmathlibrary/spu/simdmath/ldexpf4.h index 1f5bffcac..d6984575b 100644 --- a/Extras/simdmathlibrary/spu/ldexpf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/ldexpf4.h @@ -27,26 +27,30 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LDEXPF4_H___ +#define ___SIMD_MATH_LDEXPF4_H___ + #include #include -vector float -ldexpf4 (vector float x, vector signed int exp) + +static inline vector float +_ldexpf4 (vector float x, vector signed int exp) { vec_int4 zeros = spu_splats(0); - vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000); + vec_uint4 expmask = spu_splats(0x7F800000U); vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask); vec_int4 e2 = spu_rlmask(e1,-23); - vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255); - vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp); - minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros)); + vec_uint4 maxmask = spu_cmpgt(exp, 255); + vec_uint4 minmask = spu_cmpgt(spu_splats(-255), exp); + minmask = spu_or (minmask, spu_cmpeq(x, (vec_float4)zeros)); vec_int4 esum = spu_add(e2, exp); - maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255)); - maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF)); - minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); + maxmask = spu_or (maxmask, spu_cmpgt(esum, 255)); + maxmask = spu_and(maxmask, spu_splats(0x7FFFFFFFU)); + minmask = spu_or (minmask, spu_cmpgt(zeros, esum)); x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask); x = spu_sel(x, (vec_float4)zeros, minmask); @@ -54,3 +58,5 @@ ldexpf4 (vector float x, vector signed int exp) x = spu_sel(x, (vec_float4)maxmask, maxmask); return x; } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/llabsi2.h b/Extras/simdmathlibrary/spu/simdmath/llabsi2.h new file mode 100644 index 000000000..304198887 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/llabsi2.h @@ -0,0 +1,50 @@ +/* llabsi2 - returns absolute value of input. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_LLABSI2_H___ +#define ___SIMD_MATH_LLABSI2_H___ + +#include +#include + +static inline vector signed long long +_llabsi2 (vector signed long long in) +{ + vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31); + sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + + vec_uint4 add_1 = ((vec_uint4){0,1,0,1}); + vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in); + res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4)); + res = spu_sel( (vec_uint4)in, res, sign); + + return ((vec_llong2)(res)); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/lldivi2.h b/Extras/simdmathlibrary/spu/simdmath/lldivi2.h new file mode 100644 index 000000000..c0a8ca2ae --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/lldivi2.h @@ -0,0 +1,85 @@ +/* lldivi2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_LLDIVI2_H___ +#define ___SIMD_MATH_LLDIVI2_H___ + +#include +#include + +#include +#include + +static inline vector signed long long +__lldivi2_negatell2 (vector signed long long x) +{ + vector signed int zero = (vector signed int){0,0,0,0}; + vector signed int borrow; + + borrow = spu_genb(zero, (vec_int4)x); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0})); + return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); +} + +// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of +// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator. + +static inline lldivi2_t +_lldivi2 (vector signed long long numer, vector signed long long denom) +{ + lldivi2_t res; + lldivu2_t resAbs; + vec_ullong2 numerAbs, denomAbs; + vec_uint4 numerPos, denomPos, quotNeg; + + // Determine whether result needs sign change + + numerPos = spu_cmpgt((vec_int4)numer, -1); + numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + denomPos = spu_cmpgt((vec_int4)denom, -1); + denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8})); + quotNeg = spu_xor( numerPos, denomPos ); + + // Use absolute values of numerator, denominator + + numerAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(numer), numer, (vec_ullong2)numerPos); + denomAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(denom), denom, (vec_ullong2)denomPos); + + // Get difference of leading zeros. + + resAbs = _lldivu2(numerAbs, denomAbs); + res.quot = spu_sel((vec_llong2)resAbs.quot, __lldivi2_negatell2((vec_llong2)resAbs.quot), + (vec_ullong2)quotNeg); + res.rem = spu_sel(__lldivi2_negatell2((vec_llong2)resAbs.rem), (vec_llong2)resAbs.rem, + (vec_ullong2)numerPos); + + return res; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/lldivu2.c b/Extras/simdmathlibrary/spu/simdmath/lldivu2.h similarity index 60% rename from Extras/simdmathlibrary/spu/lldivu2.c rename to Extras/simdmathlibrary/spu/simdmath/lldivu2.h index 864d11191..f58c30510 100644 --- a/Extras/simdmathlibrary/spu/lldivu2.c +++ b/Extras/simdmathlibrary/spu/simdmath/lldivu2.h @@ -27,46 +27,51 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LLDIVU2_H___ +#define ___SIMD_MATH_LLDIVU2_H___ + #include #include -#include "lldiv.h" + +#include // lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of // numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator. -lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom) +static inline lldivu2_t +_lldivu2 (vector unsigned long long numer, vector unsigned long long denom) { - lldivu2_t res; - vec_uint4 denomZeros, numerZeros; - vec_int4 shift; - vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft; - vec_ullong2 quot, newQuot; - vec_ullong2 newNum, skip, cont; - int anyCont; + lldivu2_t res; + vec_uint4 denomZeros, numerZeros; + vec_int4 shift; + vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft; + vec_ullong2 quot, newQuot; + vec_ullong2 newNum, skip, cont; + int anyCont; - // Get difference of leading zeros. + // Get difference of leading zeros. - denomZeros = (vec_uint4)ll_spu_cntlz( denom ); - numerZeros = (vec_uint4)ll_spu_cntlz( numer ); + denomZeros = (vec_uint4)__ll_spu_cntlz( denom ); + numerZeros = (vec_uint4)__ll_spu_cntlz( numer ); - shift = (vec_int4)spu_sub( denomZeros, numerZeros ); + shift = (vec_int4)spu_sub( denomZeros, numerZeros ); - // Shift denom to align leading one with numerator's + // Shift denom to align leading one with numerator's - denomShifted = ll_spu_sl( denom, (vec_ullong2)shift ); - oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift ); - oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) ); + denomShifted = __ll_spu_sl( denom, (vec_ullong2)shift ); + oneShifted = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift ); + oneShifted = spu_sel( oneShifted, spu_splats(0ull), __ll_spu_cmpeq_zero( denom ) ); - // Shift left all leading zeros. + // Shift left all leading zeros. - denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros ); - oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros ); + denomLeft = __ll_spu_sl( denom, (vec_ullong2)denomZeros ); + oneLeft = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros ); - quot = spu_splats(0ull); + quot = spu_splats(0ull); - do - { - cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) ); + do + { + cont = __ll_spu_cmpgt( oneShifted, spu_splats(0ull) ); anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 ); newQuot = spu_or( quot, oneShifted ); @@ -74,25 +79,26 @@ lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long de // Subtract shifted denominator from remaining numerator // when denominator is not greater. - skip = ll_spu_cmpgt( denomShifted, numer ); - newNum = ll_spu_sub( numer, denomShifted ); + skip = __ll_spu_cmpgt( denomShifted, numer ); + newNum = __ll_spu_sub( numer, denomShifted ); // If denominator is greater, next shift is one more, otherwise // next shift is number of leading zeros of remaining numerator. - numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip ); + numerZeros = (vec_uint4)spu_sel( __ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip ); shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros ); - oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift ); - denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift ); + oneShifted = __ll_spu_rlmask( oneLeft, (vec_ullong2)shift ); + denomShifted = __ll_spu_rlmask( denomLeft, (vec_ullong2)shift ); quot = spu_sel( newQuot, quot, skip ); numer = spu_sel( newNum, numer, spu_orc(skip,cont) ); - } - while ( anyCont ); + } + while ( anyCont ); - res.quot = quot; - res.rem = numer; - return res; + res.quot = quot; + res.rem = numer; + return res; } +#endif diff --git a/Extras/simdmathlibrary/spu/llrintd2.c b/Extras/simdmathlibrary/spu/simdmath/llrintd2.h similarity index 92% rename from Extras/simdmathlibrary/spu/llrintd2.c rename to Extras/simdmathlibrary/spu/simdmath/llrintd2.h index 155ec3490..02ebacdc2 100644 --- a/Extras/simdmathlibrary/spu/llrintd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/llrintd2.h @@ -28,6 +28,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LLRINTD2_H___ +#define ___SIMD_MATH_LLRINTD2_H___ + #include #include @@ -35,8 +38,8 @@ // Handles no exception // over flow will return unspecified data -vector signed long long -llrintd2 (vector double in) +static inline vector signed long long +_llrintd2 (vector double in) { int shift0, shift1; vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}); @@ -67,8 +70,8 @@ llrintd2 (vector double in) bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large); bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL)); -// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, -// (vec_ullong2)spu_splats(0x8000000000000000ULL)); + // bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx, + // (vec_ullong2)spu_splats(0x8000000000000000ULL)); mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias)); /* Determine how many bits to shift the mantissa to correctly @@ -102,9 +105,11 @@ llrintd2 (vector double in) mant = spu_xor(mant, sign); borrow = spu_genb(mant, sign); borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ - 4,5,6,7, 192,192,192,192, - 12,13,14,15, 192,192,192,192})); + 4,5,6,7, 192,192,192,192, + 12,13,14,15, 192,192,192,192})); mant = spu_subx(mant, sign, borrow); return ((vec_llong2)(mant)); } + +#endif diff --git a/Extras/simdmathlibrary/spu/llrintf4.c b/Extras/simdmathlibrary/spu/simdmath/llrintf4.h similarity index 97% rename from Extras/simdmathlibrary/spu/llrintf4.c rename to Extras/simdmathlibrary/spu/simdmath/llrintf4.h index a9d24c139..d00279b7a 100644 --- a/Extras/simdmathlibrary/spu/llrintf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/llrintf4.h @@ -28,6 +28,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LLRINTF4_H___ +#define ___SIMD_MATH_LLRINTF4_H___ + #include #include @@ -35,8 +38,8 @@ // Handles no exception // over flow will return unspecified data -llroundf4_t -llrintf4 (vector float in) +static inline llroundf4_t +_llrintf4 (vector float in) { llroundf4_t res; vec_int4 exp; @@ -100,3 +103,5 @@ llrintf4 (vector float in) return res; } + +#endif diff --git a/Extras/simdmathlibrary/spu/llroundd2.c b/Extras/simdmathlibrary/spu/simdmath/llroundd2.h similarity index 92% rename from Extras/simdmathlibrary/spu/llroundd2.c rename to Extras/simdmathlibrary/spu/simdmath/llroundd2.h index eaaab6e74..0efd4724b 100644 --- a/Extras/simdmathlibrary/spu/llroundd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/llroundd2.h @@ -28,6 +28,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LLROUNDD2_H___ +#define ___SIMD_MATH_LLROUNDD2_H___ + #include #include @@ -35,8 +38,8 @@ // Handles no exception // over flow will return unspecified data -vector signed long long -llroundd2 (vector double in) +static inline vector signed long long +_llroundd2 (vector double in) { int shift0, shift1; vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}; @@ -72,7 +75,7 @@ llroundd2 (vector double in) */ addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24})); addend = spu_rlmask(addend, -7); -// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1})); + // addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1})); mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4)); /* Compute the two's complement of the mantissa if the @@ -84,9 +87,11 @@ llroundd2 (vector double in) mant = spu_xor(mant, sign); borrow = spu_genb(mant, sign); borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){ - 4,5,6,7, 192,192,192,192, - 12,13,14,15, 192,192,192,192})); + 4,5,6,7, 192,192,192,192, + 12,13,14,15, 192,192,192,192})); mant = spu_subx(mant, sign, borrow); return ((vec_llong2)(mant)); } + +#endif diff --git a/Extras/simdmathlibrary/spu/llroundf4.c b/Extras/simdmathlibrary/spu/simdmath/llroundf4.h similarity index 95% rename from Extras/simdmathlibrary/spu/llroundf4.c rename to Extras/simdmathlibrary/spu/simdmath/llroundf4.h index f4f89dae2..77345d869 100644 --- a/Extras/simdmathlibrary/spu/llroundf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/llroundf4.h @@ -28,6 +28,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LLROUNDF4_H___ +#define ___SIMD_MATH_LLROUNDF4_H___ + #include #include @@ -35,8 +38,8 @@ // Handles no exception // over flow will return unspecified data -llroundf4_t -llroundf4 (vector float in) +static inline llroundf4_t +_llroundf4 (vector float in) { llroundf4_t res; vec_int4 exp; @@ -90,8 +93,8 @@ llroundf4 (vector float in) addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24})); addend0 = spu_rlmask(addend0, -7); addend1 = spu_rlmask(addend1, -7); -// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1})); -// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1})); + // addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1})); + // addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1})); res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4)); res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4)); @@ -113,3 +116,5 @@ llroundf4 (vector float in) return res; } + +#endif diff --git a/Extras/simdmathlibrary/spu/log10f4.c b/Extras/simdmathlibrary/spu/simdmath/log10f4.h similarity index 62% rename from Extras/simdmathlibrary/spu/log10f4.c rename to Extras/simdmathlibrary/spu/simdmath/log10f4.h index b3ce42112..9f28103f4 100644 --- a/Extras/simdmathlibrary/spu/log10f4.c +++ b/Extras/simdmathlibrary/spu/simdmath/log10f4.h @@ -27,53 +27,57 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOG10F4_H___ +#define ___SIMD_MATH_LOG10F4_H___ + #include #include +#include -#define _LOG10F_H_loga2msb ((float)0.3010299205780f) -#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f) -#define _LOG10F_H_logaemsb ((float)0.4342944622040f) -#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f) -#define _LOG10F_H_logae ((float)0.4342944819033f) +#define __LOG10F_loga2msb 0.3010299205780f +#define __LOG10F_loga2lsb 7.5085978266e-8f +#define __LOG10F_logaemsb 0.4342944622040f +#define __LOG10F_logaelsb 1.9699272335e-8f +#define __LOG10F_logae 0.4342944819033f -#define _LOG10F_H_c0 ((float)(0.2988439998f)) -#define _LOG10F_H_c1 ((float)(0.3997655209f)) -#define _LOG10F_H_c2 ((float)(0.6666679125f)) +#define __LOG10F_c0 0.2988439998f +#define __LOG10F_c1 0.3997655209f +#define __LOG10F_c2 0.6666679125f -vector float -log10f4 (vector float x) +static inline vector float +_log10f4 (vector float x) { vec_int4 zeros = spu_splats((int)0); vec_float4 ones = spu_splats(1.0f); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros); - vec_int4 expmask = spu_splats((int)0x7F800000); - vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); - x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + vec_uint4 expmask = spu_splats(0x7F800000U); + vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 ); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask); - vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x); x = spu_sel(x , spu_add(x, x) , mask); xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); vec_float4 x1 = spu_sub(x , ones); - vec_float4 z = divf4 (x1, spu_add(x, ones)); + vec_float4 z = _divf4 (x1, spu_add(x, ones)); vec_float4 w = spu_mul(z , z); vec_float4 polyw; - polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1)); - polyw = spu_madd(polyw , w, spu_splats(_LOG10F_H_c2)); + polyw = spu_madd(spu_splats(__LOG10F_c0), w, spu_splats(__LOG10F_c1)); + polyw = spu_madd(polyw , w, spu_splats(__LOG10F_c2)); vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); vec_float4 wnew = spu_convtf(xexp,0); - vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1, - spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew)); - vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1, - spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew, - spu_mul(spu_splats(_LOG10F_H_logae), yneg)) + vec_float4 zz1 = spu_madd(spu_splats(__LOG10F_logaemsb), x1, + spu_mul(spu_splats(__LOG10F_loga2msb),wnew)); + vec_float4 zz2 = spu_madd(spu_splats(__LOG10F_logaelsb), x1, + spu_madd(spu_splats(__LOG10F_loga2lsb), wnew, + spu_mul(spu_splats(__LOG10F_logae), yneg)) ); return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); } - +#endif diff --git a/Extras/simdmathlibrary/spu/log1pf4.c b/Extras/simdmathlibrary/spu/simdmath/log1pf4.h similarity index 72% rename from Extras/simdmathlibrary/spu/log1pf4.c rename to Extras/simdmathlibrary/spu/simdmath/log1pf4.h index cab4a959a..6715aa4df 100644 --- a/Extras/simdmathlibrary/spu/log1pf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/log1pf4.h @@ -27,25 +27,34 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOG1PF4_H___ +#define ___SIMD_MATH_LOG1PF4_H___ + #include #include -vector float -log1pf4 (vector float x) + +#include +#include + +static inline vector float +_log1pf4 (vector float x) { - vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)), - spu_cmpgt(spu_splats(0.5f), x)); + vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(-0.5f)), + spu_cmpgt(spu_splats(0.5f), x)); vec_float4 x2 = spu_mul(x,x); vec_float4 d0, d1, n0, n1; - d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f)); - d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f)); + d0 = spu_madd(x , spu_splats(1.5934420741f), spu_splats(0.8952856868f)); + d1 = spu_madd(x , spu_splats(0.1198195734f), spu_splats(0.8377145063f)); d1 = spu_madd(x2, d1, d0); - n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f)); - n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f)); + n0 = spu_madd(x , spu_splats(1.1457993413f), spu_splats(0.8952856678f)); + n1 = spu_madd(x , spu_splats(0.0082862580f), spu_splats(0.3394238808f)); n1 = spu_madd(x2, n1, n0); - return spu_sel(logf4(spu_add(x, spu_splats(1.0f))), - spu_mul(x, divf4(n1, d1)), + return spu_sel(_logf4(spu_add(x, spu_splats(1.0f))), + spu_mul(x, _divf4(n1, d1)), nearzeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/log2f4.c b/Extras/simdmathlibrary/spu/simdmath/log2f4.h similarity index 72% rename from Extras/simdmathlibrary/spu/log2f4.c rename to Extras/simdmathlibrary/spu/simdmath/log2f4.h index 336d2b8a0..43d2dfc6f 100644 --- a/Extras/simdmathlibrary/spu/log2f4.c +++ b/Extras/simdmathlibrary/spu/simdmath/log2f4.h @@ -27,45 +27,52 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOG2F4_H___ +#define ___SIMD_MATH_LOG2F4_H___ + #include #include -#define _LOG2F_H_l2emsb ((float)1.4426950216293f) -#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f) -#define _LOG2F_H_l2e ((float)1.4426950408890f) +#include -#define _LOG2F_H_c0 ((float)(0.2988439998f)) -#define _LOG2F_H_c1 ((float)(0.3997655209f)) -#define _LOG2F_H_c2 ((float)(0.6666679125f)) +#define __LOG2F_l2emsb 1.4426950216293f +#define __LOG2F_l2elsb 1.9259629911e-8f +#define __LOG2F_l2e 1.4426950408890f -vector float -log2f4 (vector float x) +#define __LOG2F_c0 0.2988439998f +#define __LOG2F_c1 0.3997655209f +#define __LOG2F_c2 0.6666679125f + +static inline vector float +_log2f4 (vector float x) { vec_int4 zeros = spu_splats((int)0); vec_float4 ones = spu_splats(1.0f); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros); vec_int4 expmask = spu_splats((int)0x7F800000); vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); - x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uint4)expmask); - vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x); x = spu_sel(x , spu_add(x, x) , mask); xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); vec_float4 x1 = spu_sub(x , ones); - vec_float4 z = divf4(x1, spu_add(x, ones)); + vec_float4 z = _divf4(x1, spu_add(x, ones)); vec_float4 w = spu_mul(z , z); vec_float4 polyw; - polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1)); - polyw = spu_madd(polyw , w, spu_splats(_LOG2F_H_c2)); + polyw = spu_madd(spu_splats(__LOG2F_c0), w, spu_splats(__LOG2F_c1)); + polyw = spu_madd(polyw , w, spu_splats(__LOG2F_c2)); vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); - vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0)); - vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1, - spu_mul(spu_splats(_LOG2F_H_l2e), yneg) + vec_float4 zz1 = spu_madd(spu_splats(__LOG2F_l2emsb), x1, spu_convtf(xexp,0)); + vec_float4 zz2 = spu_madd(spu_splats(__LOG2F_l2elsb), x1, + spu_mul(spu_splats(__LOG2F_l2e), yneg) ); return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/logbd2.h b/Extras/simdmathlibrary/spu/simdmath/logbd2.h new file mode 100644 index 000000000..55bfdc8e8 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/logbd2.h @@ -0,0 +1,86 @@ +/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_LOGBD2_H___ +#define ___SIMD_MATH_LOGBD2_H___ + +#include +#include +#include + +static inline vector double +_logbd2 (vector double x) +{ + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f }; + vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b }; + + vec_ullong2 sign = spu_splats(0x8000000000000000ull); + vec_ullong2 expn = spu_splats(0x7ff0000000000000ull); + vec_ullong2 zero = spu_splats(0x0000000000000000ull); + + vec_ullong2 isnan, isinf, iszero; + vec_double2 logb = (vec_double2)zero; + vec_llong2 e1, e2; + vec_uint4 cmpgt, cmpeq, cmpzr; + vec_int4 lz, lz0, lz1; + + //NAN: x is NaN (all-ones exponent and non-zero mantissa) + cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) ); + isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ), + spu_and( spu_shuffle( cmpeq, cmpeq, even ), + spu_shuffle( cmpgt, cmpgt, odd ) ) ); + logb = spu_sel( logb, (vec_double2)spu_splats(0x7FF8000000000000ll), isnan ); + + //INF: x is infinite (all-ones exponent and zero mantissa) + isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ); + logb = spu_sel( logb, (vec_double2)spu_splats(__builtin_huge_val()), isinf ); + + //HUGE_VAL: x is zero (zero exponent and zero mantissa) + cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero ); + iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ); + logb = spu_sel( logb, (vec_double2)spu_splats(-__builtin_huge_val()), iszero ); + + //Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x + e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn ); + e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20); + + lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) ); + lz0 = (vec_int4)spu_shuffle( lz, lz, even ); + lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) ); + lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) ); + + logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ), + spu_nor( isnan, spu_or( isinf, iszero ) ) ); + + return logb; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/logbf4.c b/Extras/simdmathlibrary/spu/simdmath/logbf4.h similarity index 92% rename from Extras/simdmathlibrary/spu/logbf4.c rename to Extras/simdmathlibrary/spu/simdmath/logbf4.h index 85662a416..40e0d7a71 100644 --- a/Extras/simdmathlibrary/spu/logbf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/logbf4.h @@ -27,20 +27,20 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOGBF4_H___ +#define ___SIMD_MATH_LOGBF4_H___ + #include #include #include -#ifndef HUGE_VALF -#define HUGE_VALF __builtin_huge_valf () -#endif - -vector float -logbf4 (vector float x) +static inline vector float +_logbf4 (vector float x) { vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000)); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0); + vec_uint4 zeromask = spu_cmpeq(e1, 0); e1 = spu_sub(e1, spu_splats((int)0x3F800000)); return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask); } +#endif diff --git a/Extras/simdmathlibrary/spu/logf4.c b/Extras/simdmathlibrary/spu/simdmath/logf4.h similarity index 69% rename from Extras/simdmathlibrary/spu/logf4.c rename to Extras/simdmathlibrary/spu/simdmath/logf4.h index 6e7f03d27..2b901a544 100644 --- a/Extras/simdmathlibrary/spu/logf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/logf4.h @@ -27,44 +27,50 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_LOGF4_H___ +#define ___SIMD_MATH_LOGF4_H___ + #include #include +#include -#define _LOGF_H_ln2msb ((float)(0.6931470632553f)) -#define _LOGF_H_ln2lsb ((float)(1.1730463525e-7f)) +#define __LOGF_ln2msb 0.6931470632553f +#define __LOGF_ln2lsb 1.1730463525e-7f -#define _LOGF_H_c0 ((float)(0.2988439998f)) -#define _LOGF_H_c1 ((float)(0.3997655209f)) -#define _LOGF_H_c2 ((float)(0.6666679125f)) +#define __LOGF_c0 0.2988439998f +#define __LOGF_c1 0.3997655209f +#define __LOGF_c2 0.6666679125f -vector float -logf4 (vector float x) +static inline vector float +_logf4 (vector float x) { vec_int4 zeros = spu_splats((int)0); vec_float4 ones = spu_splats(1.0f); - vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros); + vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros); - vec_int4 expmask = spu_splats((int)0x7F800000); - vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 ); - x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask); + vec_uint4 expmask = spu_splats(0x7F800000U); + vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 ); + x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask); - vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x); + vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x); x = spu_sel(x , spu_add(x, x) , mask); xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask); vec_float4 x1 = spu_sub(x , ones); - vec_float4 z = divf4 (x1, spu_add(x, ones)); + vec_float4 z = _divf4 (x1, spu_add(x, ones)); vec_float4 w = spu_mul(z , z); vec_float4 polyw; - polyw = spu_madd(spu_splats(_LOGF_H_c0), w, spu_splats(_LOGF_H_c1)); - polyw = spu_madd(polyw , w, spu_splats(_LOGF_H_c2)); + polyw = spu_madd(spu_splats(__LOGF_c0), w, spu_splats(__LOGF_c1)); + polyw = spu_madd(polyw , w, spu_splats(__LOGF_c2)); vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1)); vec_float4 wnew = spu_convtf(xexp,0); - vec_float4 zz1 = spu_madd(spu_splats(_LOGF_H_ln2msb), wnew, x1); - vec_float4 zz2 = spu_madd(spu_splats(_LOGF_H_ln2lsb), wnew, yneg); + vec_float4 zz1 = spu_madd(spu_splats(__LOGF_ln2msb), wnew, x1); + vec_float4 zz2 = spu_madd(spu_splats(__LOGF_ln2lsb), wnew, yneg); return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask); } + +#endif diff --git a/Extras/simdmathlibrary/spu/modfd2.c b/Extras/simdmathlibrary/spu/simdmath/modfd2.h similarity index 72% rename from Extras/simdmathlibrary/spu/modfd2.c rename to Extras/simdmathlibrary/spu/simdmath/modfd2.h index 5c10df205..cb9f52501 100644 --- a/Extras/simdmathlibrary/spu/modfd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/modfd2.h @@ -27,28 +27,33 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_MODFD2_H___ +#define ___SIMD_MATH_MODFD2_H___ + #include #include +#include // Returns fractional part and stores integral part in *iptr. -vector double -modfd2 (vector double x, vector double *iptr) +static inline vector double +_modfd2 (vector double x, vector double *iptr) { - vec_double2 integral, fraction; - vec_uint4 iszero; - vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0}; - vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; + vec_double2 integral, fraction; + vec_uint4 iszero; + vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0}; + vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11}; - integral = truncd2( x ); + integral = _truncd2( x ); - // if integral is zero, then fraction is x. - iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0); - iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern)); - fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero); + // if integral is zero, then fraction is x. + iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0); + iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern)); + fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero); - *iptr = integral; - return fraction; + *iptr = integral; + return fraction; } +#endif diff --git a/Extras/simdmathlibrary/spu/modff4.c b/Extras/simdmathlibrary/spu/simdmath/modff4.h similarity index 85% rename from Extras/simdmathlibrary/spu/modff4.c rename to Extras/simdmathlibrary/spu/simdmath/modff4.h index 3b28242c4..e18cb5967 100644 --- a/Extras/simdmathlibrary/spu/modff4.c +++ b/Extras/simdmathlibrary/spu/simdmath/modff4.h @@ -27,21 +27,26 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_MODFF4_H___ +#define ___SIMD_MATH_MODFF4_H___ + #include #include +#include // Returns fractional part and stores integral part in *iptr. -vector float -modff4 (vector float x, vector float *iptr) +static inline vector float +_modff4 (vector float x, vector float *iptr) { - vec_float4 integral, fraction; + vec_float4 integral, fraction; - integral = truncf4( x ); - fraction = spu_sub( x, integral ); + integral = _truncf4( x ); + fraction = spu_sub( x, integral ); - *iptr = integral; - return fraction; + *iptr = integral; + return fraction; } +#endif diff --git a/Extras/simdmathlibrary/spu/nearbyintd2.c b/Extras/simdmathlibrary/spu/simdmath/nearbyintd2.h similarity index 95% rename from Extras/simdmathlibrary/spu/nearbyintd2.c rename to Extras/simdmathlibrary/spu/simdmath/nearbyintd2.h index ac5f90755..111bb01b4 100644 --- a/Extras/simdmathlibrary/spu/nearbyintd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/nearbyintd2.h @@ -28,11 +28,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_NEARBYINTD2_H___ +#define ___SIMD_MATH_NEARBYINTD2_H___ + #include #include -vector double -nearbyintd2(vector double in) +static inline vector double +_nearbyintd2(vector double in) { vec_uint4 fpscr; vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL}); @@ -69,3 +72,5 @@ nearbyintd2(vector double in) return (out); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/nearbyintf4.h b/Extras/simdmathlibrary/spu/simdmath/nearbyintf4.h new file mode 100644 index 000000000..99d31dfe7 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/nearbyintf4.h @@ -0,0 +1,55 @@ +/* nearbyintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model, + without raising an inexact floating-point exception. + On SPU, the rounding mode for float is always towards zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEARBYINTF4_H___ +#define ___SIMD_MATH_NEARBYINTF4_H___ + +#include +#include + +static inline vector float +_nearbyintf4(vector float x) +{ + vector signed int xi; + vector unsigned int inrange; + + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. + + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/negated2.h b/Extras/simdmathlibrary/spu/simdmath/negated2.h new file mode 100644 index 000000000..19c90287f --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/negated2.h @@ -0,0 +1,42 @@ +/* negated2 - for each of two double slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEGATED2_H___ +#define ___SIMD_MATH_NEGATED2_H___ + +#include +#include + +static inline vector double +_negated2 (vector double x) +{ + return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/negatef4.h b/Extras/simdmathlibrary/spu/simdmath/negatef4.h new file mode 100644 index 000000000..52dd3a1bb --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/negatef4.h @@ -0,0 +1,43 @@ +/* negatef4 - for each of four float slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEGATEF4_H___ +#define ___SIMD_MATH_NEGATEF4_H___ + +#include +#include + + +static inline vector float +_negatef4 (vector float x) +{ + return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/negatei4.c b/Extras/simdmathlibrary/spu/simdmath/negatei4.h similarity index 92% rename from Extras/simdmathlibrary/spu/negatei4.c rename to Extras/simdmathlibrary/spu/simdmath/negatei4.h index f74232a39..3b29c50ba 100644 --- a/Extras/simdmathlibrary/spu/negatei4.c +++ b/Extras/simdmathlibrary/spu/simdmath/negatei4.h @@ -27,13 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_NEGATEI4_H___ +#define ___SIMD_MATH_NEGATEI4_H___ + #include #include -vector signed int -negatei4 (vector signed int x) +static inline vector signed int +_negatei4 (vector signed int x) { vector signed int zero = (vector signed int){0,0,0,0}; return spu_sub (zero, x); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/negatell2.h b/Extras/simdmathlibrary/spu/simdmath/negatell2.h new file mode 100644 index 000000000..b618e11da --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/negatell2.h @@ -0,0 +1,47 @@ +/* negatell2 - for each of 2 signed long long slots, negate the sign bit. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEGATELL2_H___ +#define ___SIMD_MATH_NEGATELL2_H___ + +#include +#include + +static inline vector signed long long +_negatell2 (vector signed long long x) +{ + vector signed int zero = (vector signed int){0,0,0,0}; + vector signed int borrow; + + borrow = spu_genb(zero, (vec_int4)x); + borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0})); + return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/nextafterd2.h b/Extras/simdmathlibrary/spu/simdmath/nextafterd2.h new file mode 100644 index 000000000..2fcd98f54 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/nextafterd2.h @@ -0,0 +1,97 @@ +/* nextafterd2 - find next representable floating-point value towards 2nd param. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_NEXTAFTERD2_H___ +#define ___SIMD_MATH_NEXTAFTERD2_H___ + +#include +#include + +static inline vector double +_nextafterd2 (vector double xx, vector double yy) +{ + vec_uint4 abs_x, abs_y, sign_x, abs_dif; + vec_uint4 is_sub, is_zerox, is_zeroy; + vec_uint4 is_equal, is_infy, is_nany; + vec_uint4 res0, res1, res; + vec_uint4 vec_zero = ((vec_uint4){0,0,0,0}); + vec_uint4 vec_one = ((vec_uint4){0,1,0,1}); + vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1}); + vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0}); + vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}); + vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}); + + // mask sign bit + abs_x = spu_and( (vec_uint4)xx, msk_abs); + abs_y = spu_and( (vec_uint4)yy, msk_abs); + + is_zerox = spu_cmpeq( abs_x, vec_zero); + is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq)); + + // -0 exception + sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0})); + sign_x = spu_sel(sign_x, vec_zero, is_zerox); + + // if same sign |y| < |x| -> decrease + abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4)); + is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease + is_sub = spu_or(is_sub, abs_dif); + is_sub = spu_rlmaska(is_sub, -31); + is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); + + res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase + res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease + res = spu_sel( res0, res1, is_sub); // select increase or decrease + res = spu_or( res, sign_x); // set sign + + // check exception + // 0 -> -1 + res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub)); + + // check equal (include 0,-0) + is_zeroy = spu_cmpeq( abs_y, vec_zero); + is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq)); + is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy); + is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq)); + is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox)); + res = spu_sel(res, (vec_uint4)yy, is_equal); + + // check nan + is_infy = spu_cmpeq( abs_y, msk_exp); + is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq)); + is_nany = spu_and( abs_y, msk_exp); + is_nany = spu_cmpeq( is_nany, msk_exp); + is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq)); + is_nany = spu_sel( is_nany, vec_zero, is_infy); + res = spu_sel(res, (vec_uint4)yy, is_nany); + + return (vec_double2)res; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/nextafterf4.c b/Extras/simdmathlibrary/spu/simdmath/nextafterf4.h similarity index 91% rename from Extras/simdmathlibrary/spu/nextafterf4.c rename to Extras/simdmathlibrary/spu/simdmath/nextafterf4.h index 3807d48d4..cf32a4fcd 100644 --- a/Extras/simdmathlibrary/spu/nextafterf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/nextafterf4.h @@ -29,11 +29,15 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_NEXTAFTERF4_H___ +#define ___SIMD_MATH_NEXTAFTERF4_H___ + #include #include -vector float nextafterf4(vector float x, vector float y) +static inline vector float +_nextafterf4(vector float x, vector float y) { vec_float4 x_not_dec, lala_inc, lala_dec; vec_uint4 abs_inc_number, abs_dec_number; @@ -41,22 +45,22 @@ vector float nextafterf4(vector float x, vector float y) vec_uint4 A, B; //abs_inc, abs_dec - abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats((float)0x0))); - abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats((float)0x0)); + abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats(0.0f))); + abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats(0.0f)); //x<= y A= spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000)); // in < 0 B= abs_dec_number; - lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats((float)0x0), x)); + lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats(0.0f), x)); // in <=0, abs_inc ( if in==0, set result's sign to -) //A= spu_or(spu_splats((unsigned int)0x80000000), spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000))); A= spu_or(abs_inc_number, spu_splats((unsigned int)0x80000000)); // in > 0 B = abs_dec_number; - lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats((float)0x0))); + lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats(0.0f))); x_not_dec = spu_sel(y, lala_inc, spu_cmpgt(y, x)); @@ -64,3 +68,5 @@ vector float nextafterf4(vector float x, vector float y) // (x <= y) || (x > y) return spu_sel(x_not_dec, lala_dec, spu_cmpgt(x, y)); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/powf4.h b/Extras/simdmathlibrary/spu/simdmath/powf4.h new file mode 100644 index 000000000..aacb61ee6 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/powf4.h @@ -0,0 +1,60 @@ +/* powf4 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_POWF4_H___ +#define ___SIMD_MATH_POWF4_H___ + +#include +#include + +#include +#include + +static inline vector float +_powf4 (vector float x, vector float y) +{ + vec_int4 zeros = spu_splats((int)0); + vec_uint4 zeromask = spu_cmpeq((vec_float4)zeros, x); + + vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x); + + vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000); + vec_float4 absx = spu_andc(x, sbit); + vec_float4 absy = spu_andc(y, sbit); + vec_uint4 oddy = spu_and(spu_convtu(absy, 0), spu_splats(0x00000001U)); + negmask = spu_and(negmask, spu_cmpgt(oddy, (vec_uint4)zeros)); + + vec_float4 res = _exp2f4(spu_mul(y, _log2f4(absx))); + res = spu_sel(res, spu_or(sbit, res), negmask); + + + return spu_sel(res, (vec_float4)zeros, zeromask); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/recipd2.c b/Extras/simdmathlibrary/spu/simdmath/recipd2.h similarity index 55% rename from Extras/simdmathlibrary/spu/recipd2.c rename to Extras/simdmathlibrary/spu/simdmath/recipd2.h index 3a6ef771b..906ca336f 100644 --- a/Extras/simdmathlibrary/spu/recipd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/recipd2.h @@ -27,54 +27,62 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_RECIPD2_H___ +#define ___SIMD_MATH_RECIPD2_H___ + #include #include +#include +#include +#include + // Handles exceptional values as follows: // NaN -> NaN // (+,-)Inf -> (+,-)0 // (+,-)0 -> (+,-)Inf // Denormal inputs are treated as zero. -vector double -recipd2 (vector double x) +static inline vector double +_recipd2 (vector double x) { - vec_ullong2 expmask, signmask; - vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result; - vec_float4 onef, manf, y0f, y1f; + vec_ullong2 expmask, signmask; + vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result; + vec_float4 onef, manf, y0f, y1f; - expmask = spu_splats(0x7ff0000000000000ull); - signmask = spu_splats(0x8000000000000000ull); - onef = spu_splats(1.0f); - one = spu_extend( onef ); + expmask = spu_splats(0x7ff0000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); - // Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ). - // Invert exponent part with subtraction. + // Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ). + // Invert exponent part with subtraction. - exp = spu_and( x, (vec_double2)expmask ); - nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp ); - man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask ); + exp = spu_and( x, (vec_double2)expmask ); + nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp ); + man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask ); - // Compute mantissa part with single and double precision Newton-Raphson steps. - // Then multiply with 2^(1-exponent). + // Compute mantissa part with single and double precision Newton-Raphson steps. + // Then multiply with 2^(1-exponent). - manf = spu_roundtf( man ); - y0f = spu_re( manf ); - y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f ); - y1 = spu_extend( y1f ); - y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 ); - y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 ); - y3 = spu_mul( y3, nexp ); + manf = spu_roundtf( man ); + y0f = spu_re( manf ); + y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f ); + y1 = spu_extend( y1f ); + y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 ); + y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 ); + y3 = spu_mul( y3, nexp ); - // Choose iterated result or special value. + // Choose iterated result or special value. - zero = spu_and( x, (vec_double2)signmask ); - inf = spu_sel( (vec_double2)expmask, x, signmask ); + zero = spu_and( x, (vec_double2)signmask ); + inf = spu_sel( (vec_double2)expmask, x, signmask ); - result = spu_sel( y3, zero, isinfd2 ( x ) ); - result = spu_sel( result, inf, is0denormd2 ( x ) ); - result = spu_sel( result, x, isnand2( x ) ); + result = spu_sel( y3, zero, _isinfd2 ( x ) ); + result = spu_sel( result, inf, _is0denormd2 ( x ) ); + result = spu_sel( result, x, _isnand2( x ) ); - return result; + return result; } +#endif diff --git a/Extras/simdmathlibrary/spu/recipf4.c b/Extras/simdmathlibrary/spu/simdmath/recipf4.h similarity index 77% rename from Extras/simdmathlibrary/spu/recipf4.c rename to Extras/simdmathlibrary/spu/simdmath/recipf4.h index c0337414d..e7dfd8651 100644 --- a/Extras/simdmathlibrary/spu/recipf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/recipf4.h @@ -27,19 +27,24 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_RECIPF4_H___ +#define ___SIMD_MATH_RECIPF4_H___ + #include #include -vector float recipf4 (vector float x) +static inline vector float +_recipf4 (vector float x) { - // Reciprocal estimate and 1 Newton-Raphson iteration. - // A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact - // answers for powers of 2, and a slightly smaller relative error bound. + // Reciprocal estimate and 1 Newton-Raphson iteration. + // A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact + // answers for powers of 2, and a slightly smaller relative error bound. - vec_float4 y0; - vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + vec_float4 y0; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); - y0 = spu_re( x ); - return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 ); + y0 = spu_re( x ); + return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/remainderd2.h b/Extras/simdmathlibrary/spu/simdmath/remainderd2.h new file mode 100644 index 000000000..3bdc67ee1 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/remainderd2.h @@ -0,0 +1,110 @@ +/* A vector double is returned that contains the remainder xi REM yi, + for the corresponding elements of vector double x and vector double y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_REMAINDERD2_H___ +#define ___SIMD_MATH_REMAINDERD2_H___ + +#include +#include + +#include +#include + + +static inline vector double +_remainderd2(vector double x, vector double yy) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_uint4 y_hi; + vec_uint4 abs_x, abs_yy, abs_2x, abs_2y; + vec_uint4 bias; + vec_uint4 nan_out, overflow; + vec_uint4 result; + vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); + vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); + vec_uint4 vec_zero = spu_splats((unsigned int)0); + vec_uint4 is_zeroy; + + // cut sign + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_yy = spu_andc((vec_uint4)yy, sign_mask); + y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); + + + // check nan out + is_zeroy = spu_cmpeq(abs_yy, vec_zero); + is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); + nan_out = __vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 + nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 + nan_out = spu_or(nan_out, is_zeroy); // y = 0 + nan_out = spu_shuffle(nan_out, nan_out, splat_hi); + + + // make y x2 + abs_2y = __rem_twice_d(abs_yy); // 2 x y + + result = (vec_uint4)_fmodd2((vec_double2)abs_x, (vec_double2)abs_2y); + + // abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF))); + + /* if (2*x > y) + * x -= y + * if (2*x >= y) x -= y + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); + // make x2 + abs_2x = __rem_twice_d(abs_x); // 2 x x + + bias = __vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy + bias = spu_andc(bias, overflow); + + abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias); + + + overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous + + // make x2 + abs_2x = __rem_twice_d(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous + bias = spu_andc(bias, spu_rlmaska(__rem_sub_d(abs_2x, abs_yy), -31)); + bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); + abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias); + + /* select final answer + */ + result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign + result = spu_sel(result, val_nan, nan_out); // if nan + + return ((vec_double2)result); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/remainderf4.h b/Extras/simdmathlibrary/spu/simdmath/remainderf4.h new file mode 100644 index 000000000..32d5cfaee --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/remainderf4.h @@ -0,0 +1,115 @@ +/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_REMAINDERF4_H___ +#define ___SIMD_MATH_REMAINDERF4_H___ + +#include +#include + +#include +#include +#include + +// +// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero. +// If x == 0, the result is 0. +// If x != 0 and y == 0, the result is undefined. +static inline vector float +_remainderf4 (vector float x, vector float y) +{ + vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf; + vec_int4 qi0, qi1, qi2; + vec_float4 i0, i1, i2, i, rem; + vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd; + + // Find i = truncated_integer(|x/y|) + + // By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0. + // Thus the exact truncation is either the truncated quotient, one less, or one greater. + + q = _divf4( x, y ); + xabs = _fabsf4( x ); + yabs = _fabsf4( y ); + qabs = _fabsf4( q ); + xabs2 = spu_add( xabs, xabs ); + + inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q ); + inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) ); + + qi1 = spu_convts( qabs, 0 ); + qi0 = spu_add( qi1, -1 ); + qi2 = spu_add( qi1, 1 ); + + odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 ); + odd0 = odd2 = spu_nor( odd1, odd1 ); + + i0 = spu_convtf( qi0, 0 ); + i1 = spu_convtf( qi1, 0 ); + i2 = spu_convtf( qi2, 0 ); + + // Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as + // 2*|x| - i*|y| >= |x|: + // + // With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero. + // Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed + // answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128. + + cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) ); + cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) ); + + i = i0; + i = spu_sel( i1, i, cmp1 ); + i = spu_sel( i2, i, cmp2 ); + + odd = odd0; + odd = spu_sel( odd1, odd, cmp1 ); + odd = spu_sel( odd2, odd, cmp2 ); + + rem = spu_nmsub( i, yabs, xabs ); + + // Test whether i or i+1 = nearest_integer(|x/y|) + // + // i+1 is correct if: + // + // rem > 0.5*|y| + // or + // rem = 0.5*|y| and i is odd + + yabshalf = spu_mul( yabs, spu_splats(0.5f) ); + cmp1 = spu_cmpgt( rem, yabshalf ); + cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd ); + + i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) ); + i = _copysignf4( i, q ); + + return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/remquod2.h b/Extras/simdmathlibrary/spu/simdmath/remquod2.h new file mode 100644 index 000000000..ef1ea7810 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/remquod2.h @@ -0,0 +1,155 @@ +/* remquod2 - + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_REMQUOD2_H___ +#define ___SIMD_MATH_REMQUOD2_H___ + +#include +#include + +#include +#include + +/* + * This function returns the same vector double result as remainderd2(). + * In addition a vector signed long long is storedin *pquo, + * that contains the corresponding element values whose sign is + * the sign of xi / yi and whose magnitude is congruent modulo 2n to + * the magnitude of the integral quotient of xi / yi, where n is + * an implementation-defined integer greater than or equal to 3. + */ + +static inline vector double +_remquod2(vector double x, vector double yy, vector signed long long *quo) +{ + vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); + vec_int4 quotient, quotient0; + vec_uint4 y_hi; + vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y; + vec_uint4 bias; + vec_uint4 nan_out, not_ge, quo_pos, overflow; + vec_uint4 result; + vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF); + vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL)); + vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL)); + vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL)); + vec_uint4 vec_zero = spu_splats((unsigned int)0); + vec_uint4 is_zeroy; + + // cut sign + abs_x = spu_andc((vec_uint4)x, sign_mask); + abs_yy = spu_andc((vec_uint4)yy, sign_mask); + y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi); + + quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1); + quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi); + + // check nan out + is_zeroy = spu_cmpeq(abs_yy, vec_zero); + is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4)); + nan_out = __vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000 + nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000 + nan_out = spu_or(nan_out, is_zeroy); // y = 0 + nan_out = spu_shuffle(nan_out, nan_out, splat_hi); + + + // make y x8 + abs_2y = __rem_twice_d(abs_yy); // 2 x y + abs_4y = __rem_twice_d(abs_2y); // 4 x y + abs_8y = __rem_twice_d(abs_4y); // 2 x y + + result = (vec_uint4)_fmodd2((vec_double2)abs_x, (vec_double2)abs_8y); + + // if y (x8->exp+3 7FF-7FC) overflow + // abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF))); + + /* if (x >= 4*y) + * x -= 4*y + * quotient = 4 + * else + * quotient = 0 + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF)); + + not_ge = __vec_gt64(abs_4y, abs_x); + not_ge = spu_or(not_ge, overflow); + abs_x = spu_sel(__rem_sub_d(abs_x, abs_4y), abs_x, not_ge); + quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge); + + /* if (x >= 2*y + * x -= 2*y + * quotient += 2 + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF)); + + not_ge = __vec_gt64(abs_2y, abs_x); // abs_2y > abs_x + not_ge = spu_or(not_ge, overflow); + + abs_x = spu_sel(__rem_sub_d(abs_x, abs_2y), abs_x, not_ge); + quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge); + + /* if (2*x > y) + * x -= y + * if (2*x >= y) x -= y + */ + overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)); + // make x2 + abs_2x = __rem_twice_d(abs_x); // 2 x x + + bias = __vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy + bias = spu_andc(bias, overflow); + + abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous + + // make x2 + abs_2x = __rem_twice_d(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous + bias = spu_andc(bias, spu_rlmaska(__rem_sub_d(abs_2x, abs_yy), -31)); + bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow); + abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias); + quotient = spu_sub(quotient, (vec_int4)bias); + + /* select final answer + */ + result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign + result = spu_sel(result, val_nan, nan_out); // if nan + + quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit + quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4)); + quotient = spu_sel(quotient0, quotient, quo_pos); + + *quo = (vec_llong2)quotient; + + return ((vec_double2)result); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/remquof4.c b/Extras/simdmathlibrary/spu/simdmath/remquof4.h similarity index 94% rename from Extras/simdmathlibrary/spu/remquof4.c rename to Extras/simdmathlibrary/spu/simdmath/remquof4.h index cd4eb7357..734df87da 100644 --- a/Extras/simdmathlibrary/spu/remquof4.c +++ b/Extras/simdmathlibrary/spu/simdmath/remquof4.h @@ -35,12 +35,14 @@ * greater than or equal to 3. */ +#ifndef ___SIMD_MATH_REMQUODF4_H___ +#define ___SIMD_MATH_REMQUODF4_H___ #include #include -vector float -remquof4(vector float x, vector float y, vector signed int *quo) +static inline vector float +_remquof4(vector float x, vector float y, vector signed int *quo) { vec_int4 n; vec_int4 quotient; @@ -48,7 +50,7 @@ remquof4(vector float x, vector float y, vector signed int *quo) vec_uint4 abs_x, abs_y, abs_2x, abs_8y; vec_uint4 exp_x, exp_y; vec_uint4 zero_x, zero_y; -// vec_uint4 logb_x, logb_y; + // vec_uint4 logb_x, logb_y; vec_uint4 mant_x, mant_y; vec_uint4 not_ge, overflow, quo_pos, mask; vec_uint4 result, result0, resultx, cnt, sign, bias; @@ -78,11 +80,11 @@ remquof4(vector float x, vector float y, vector signed int *quo) resultx = spu_or(spu_cmpgt(abs_8y, abs_x), spu_cmpgt(abs_y, spu_splats((unsigned int)0x7E7FFFFF))); zero_x = spu_cmpeq(exp_x, 0); -// zero_y = spu_cmpeq(exp_y, 0); + // zero_y = spu_cmpeq(exp_y, 0); zero_y = spu_cmpgt(implied_1, abs_y ); -// logb_x = spu_add(exp_x, -127); -// logb_y = spu_add(exp_y, -127); + // logb_x = spu_add(exp_x, -127); + // logb_y = spu_add(exp_y, -127); mant_x = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x); mant_y = spu_andc(spu_sel(implied_1, abs_8y, mant_mask), zero_y); @@ -93,7 +95,7 @@ remquof4(vector float x, vector float y, vector signed int *quo) */ result0 = spu_or(zero_x, zero_y); -// n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); + // n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); n = spu_sub((vec_int4)exp_x, (vec_int4)exp_y); // (exp_x-127)-(exp_y-127)=exp_x-exp_y mask = spu_cmpgt(n, 0); @@ -186,4 +188,4 @@ remquof4(vector float x, vector float y, vector signed int *quo) return ((vec_float4)result); } - +#endif diff --git a/Extras/simdmathlibrary/spu/rintd2.c b/Extras/simdmathlibrary/spu/simdmath/rintd2.h similarity index 95% rename from Extras/simdmathlibrary/spu/rintd2.c rename to Extras/simdmathlibrary/spu/simdmath/rintd2.h index ad733898c..286b41690 100644 --- a/Extras/simdmathlibrary/spu/rintd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/rintd2.h @@ -28,11 +28,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_RINTD2_H___ +#define ___SIMD_MATH_RINTD2_H___ + #include #include -vector double -rintd2(vector double in) +static inline vector double +_rintd2(vector double in) { vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL}); vec_uint4 vec_norm = ((vec_uint4){0x00100000,0,0x00100000,0}); @@ -65,3 +68,5 @@ rintd2(vector double in) } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/rintf4.h b/Extras/simdmathlibrary/spu/simdmath/rintf4.h new file mode 100644 index 000000000..db8fdccd0 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/rintf4.h @@ -0,0 +1,54 @@ +/* rintf4 - for each of four float slots, round to the nearest integer, + consistent with the current rounding model. + On SPU, the rounding mode for float is always towards zero. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_RINTF4_H___ +#define ___SIMD_MATH_RINTF4_H___ + +#include +#include + +static inline vector float +_rintf4(vector float x) +{ + vector signed int xi; + vector unsigned int inrange; + + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. + + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + + xi = spu_convts( x, 0 ); + + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/roundd2.c b/Extras/simdmathlibrary/spu/simdmath/roundd2.h similarity index 93% rename from Extras/simdmathlibrary/spu/roundd2.c rename to Extras/simdmathlibrary/spu/simdmath/roundd2.h index 24f072b27..fdd9bd586 100644 --- a/Extras/simdmathlibrary/spu/roundd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/roundd2.h @@ -28,12 +28,15 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ROUNDD2_H___ +#define ___SIMD_MATH_ROUNDD2_H___ + #include #include -vector double -roundd2 (vector double in) +static inline vector double +_roundd2 (vector double in) { vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); vec_int4 exp, shift; @@ -62,7 +65,7 @@ roundd2 (vector double in) or_mask = spu_andc(spu_cmpgt(shift, 0), sign); and_mask = spu_rlmask(((vec_uint4){ 0xFFFFF, -1, 0xFFFFF, -1}), shift); -// mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -31)), or_mask); + // mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -31)), or_mask); mask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -32)), or_mask); /* Apply the mask and return the result. @@ -72,4 +75,4 @@ roundd2 (vector double in) return (out); } - +#endif diff --git a/Extras/simdmathlibrary/spu/roundf4.c b/Extras/simdmathlibrary/spu/simdmath/roundf4.h similarity index 95% rename from Extras/simdmathlibrary/spu/roundf4.c rename to Extras/simdmathlibrary/spu/simdmath/roundf4.h index 609c97897..b0f85fdb6 100644 --- a/Extras/simdmathlibrary/spu/roundf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/roundf4.h @@ -28,10 +28,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_ROUNDF4_H___ +#define ___SIMD_MATH_ROUNDF4_H___ + #include #include -vector float roundf4(vector float in) +static inline vector float +_roundf4(vector float in) { vec_int4 exp; vec_uint4 or_mask, and_mask, mask, addend; @@ -61,3 +65,5 @@ vector float roundf4(vector float in) return (out); } + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/rsqrtd2.h b/Extras/simdmathlibrary/spu/simdmath/rsqrtd2.h new file mode 100644 index 000000000..6c4dd3130 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/rsqrtd2.h @@ -0,0 +1,105 @@ +/* rsqrtd2 - for each of two double slots, compute reciprocal square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_RSQRTD2_H___ +#define ___SIMD_MATH_RSQRTD2_H___ + +#include +#include + +#include +#include +#include + +// +// Handles exceptional values as follows: +// NaN -> NaN +// (+,-)0 -> (+,-)Inf +// +Inf -> +0 +// -Inf -> Nan +// -Finite -> Nan +// Denormal inputs are treated as zero. + +static inline vector double +_rsqrtd2 (vector double x) +{ + vec_ullong2 expmask, onemask, signmask, evenexp; + vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result; + vec_float4 halff, onef, manf, y0f, y1f; + + expmask = spu_splats(0x7ff0000000000000ull); + onemask = spu_splats(0x0010000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); + halff = spu_splats(0.5f); + half = spu_extend( halff ); + + // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) + // where i = 0 when exponent is even and i = 1 when exponent is odd. + // + // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: + // + // biased_exp = 1023 + exponent + // new_biased_exp = 1023 - (exponent+i)/2 + // = 1023 - (biased_exp-1023+i)/2 + // = (3069 - (biased_exp+i)) / 2 + + evenexp = spu_and( (vec_ullong2)x, onemask ); + man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); + + exp = spu_and( x, (vec_double2)expmask ); + nexp = spu_or( exp, (vec_double2)onemask ); + nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); + + // Compute mantissa part in single precision. + // Convert back to double and multiply with 2^(-(exponent+i)/2), then + // do two Newton-Raphson steps for full precision. + + manf = spu_roundtf( man ); + y0f = spu_rsqrte( manf ); + y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); + y1 = spu_mul( spu_extend( y1f ), nexp ); + y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); + y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); + + // Choose iterated result or special value. + + zero = spu_and( x, (vec_double2)signmask ); + inf = spu_sel( (vec_double2)expmask, x, signmask ); + nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + + result = spu_sel( y3, zero, _isinfd2 ( x ) ); + result = spu_sel( result, nan, _signbitd2 ( x ) ); + result = spu_sel( result, inf, _is0denormd2 ( x ) ); + + return result; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/rsqrtf4.c b/Extras/simdmathlibrary/spu/simdmath/rsqrtf4.h similarity index 79% rename from Extras/simdmathlibrary/spu/rsqrtf4.c rename to Extras/simdmathlibrary/spu/simdmath/rsqrtf4.h index e4d6dc45e..605a479db 100644 --- a/Extras/simdmathlibrary/spu/rsqrtf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/rsqrtf4.h @@ -28,19 +28,24 @@ */ // Undefined if input < 0. +#ifndef ___SIMD_MATH_RSQRTF4_H___ +#define ___SIMD_MATH_RSQRTF4_H___ + #include #include -vector float rsqrtf4 (vector float x) +static inline vector float +_rsqrtf4 (vector float x) { - // Reciprocal square root estimate and 1 Newton-Raphson iteration. + // Reciprocal square root estimate and 1 Newton-Raphson iteration. - vec_float4 y0, y0x, y0half; - vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + vec_float4 y0, y0x, y0half; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); - y0 = spu_rsqrte( x ); - y0x = spu_mul( y0, x ); - y0half = spu_mul( y0, spu_splats(0.5f) ); - return spu_madd( spu_nmsub( y0, y0x, oneish ), y0half, y0 ); + y0 = spu_rsqrte( x ); + y0x = spu_mul( y0, x ); + y0half = spu_mul( y0, spu_splats(0.5f) ); + return spu_madd( spu_nmsub( y0, y0x, oneish ), y0half, y0 ); } +#endif diff --git a/Extras/simdmathlibrary/spu/scalbllnd2.c b/Extras/simdmathlibrary/spu/simdmath/scalbllnd2.h similarity index 94% rename from Extras/simdmathlibrary/spu/scalbllnd2.c rename to Extras/simdmathlibrary/spu/simdmath/scalbllnd2.h index cb4e20961..b51063e0e 100644 --- a/Extras/simdmathlibrary/spu/scalbllnd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/scalbllnd2.h @@ -29,19 +29,22 @@ POSSIBILITY OF SUCH DAMAGE. */ -#undef SCALBLLND2_ROUND +#ifndef ___SIMD_MATH_SCALBLLND2_H___ +#define ___SIMD_MATH_SCALBLLND2_H___ + +#undef _SCALBLLND2_ROUND #include #include -vector double -scalbllnd2(vector double x, vector signed long long ex) +static inline vector double +_scalbllnd2(vector double x, vector signed long long ex) { vec_int4 e1, e2; vec_int4 min = spu_splats(-2099); -// vec_int4 min = spu_splats(-2044); + // vec_int4 min = spu_splats(-2044); vec_int4 max = spu_splats( 2098); -// vec_int4 max = spu_splats( 2046); + // vec_int4 max = spu_splats( 2046); vec_uint4 cmp_min, cmp_max; vec_uint4 shift = ((vec_uint4){20, 32, 20, 32}); vec_double2 f1, f2; @@ -85,7 +88,7 @@ scalbllnd2(vector double x, vector signed long long ex) /* Compute the product x * 2^e1 * 2^e2 */ -// out = spu_mul(spu_mul(x, f1), f2); + // out = spu_mul(spu_mul(x, f1), f2); // check floating point register DENORM bit vec_uint4 fpscr0, fpscr; @@ -161,7 +164,7 @@ scalbllnd2(vector double x, vector signed long long ex) maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046)); maxmask = spu_shuffle(maxmask, maxmask, splat_msb); -// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); + // maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL))); minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum)); minmask = spu_shuffle(minmask, minmask, splat_msb); @@ -180,7 +183,7 @@ scalbllnd2(vector double x, vector signed long long ex) vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) ); vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) ); -#ifdef SCALBLLND2_ROUND +#ifdef _SCALBLLND2_ROUND // check current round mode fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80})); fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0})); @@ -242,14 +245,14 @@ scalbllnd2(vector double x, vector signed long long ex) mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4)); mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4)); -#endif // SCALBLLND2_ROUND +#endif // _SCALBLLND2_ROUND vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23})); // select right answer - x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask); - x = spu_sel(x, (vec_double2)zeros, minmask); - x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask); + x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask); + x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask); + x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask); out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange); @@ -262,4 +265,4 @@ scalbllnd2(vector double x, vector signed long long ex) return out; } - +#endif diff --git a/Extras/simdmathlibrary/spu/scalbnf4.c b/Extras/simdmathlibrary/spu/simdmath/scalbnf4.h similarity index 93% rename from Extras/simdmathlibrary/spu/scalbnf4.c rename to Extras/simdmathlibrary/spu/simdmath/scalbnf4.h index acc62adc5..f13826675 100644 --- a/Extras/simdmathlibrary/spu/scalbnf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/scalbnf4.h @@ -29,11 +29,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_SCALBNF4_H___ +#define ___SIMD_MATH_SCALBNF4_H___ + #include #include -vector float -scalbnf4(vector float x, vector signed int n) +static inline vector float +_scalbnf4(vector float x, vector signed int n) { vec_int4 x_exp; vec_uint4 zero, overflow; @@ -54,7 +57,7 @@ scalbnf4(vector float x, vector signed int n) zero = spu_orc(zero, spu_cmpgt(x_exp, 0)); -// overflow = spu_rlmask(spu_cmpgt(x_exp, 255), -1); + // overflow = spu_rlmask(spu_cmpgt(x_exp, 255), -1); overflow = spu_cmpgt(x_exp, 255); /* Merge the expect exponent with x's mantissa. Zero the @@ -69,4 +72,4 @@ scalbnf4(vector float x, vector signed int n) return out; } - +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/signbitd2.h b/Extras/simdmathlibrary/spu/simdmath/signbitd2.h new file mode 100644 index 000000000..7cdbb84c3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/signbitd2.h @@ -0,0 +1,48 @@ +/* signbitd2 - for each of two double slots, if input has negative sign bit return mask of ones, else 0 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SIGNBITD2_H___ +#define ___SIMD_MATH_SIGNBITD2_H___ + +#include +#include + +static inline vector unsigned long long +_signbitd2 (vector double x) +{ + vec_ullong2 cmp; + vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + cmp = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)x ); + cmp = spu_shuffle( cmp, cmp, even ); + + return cmp; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/signbitf4.h b/Extras/simdmathlibrary/spu/simdmath/signbitf4.h new file mode 100644 index 000000000..7fbda7a02 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/signbitf4.h @@ -0,0 +1,42 @@ +/* signbitf4 - for each element of vector x, return a mask of ones if x' has signbit one, zero otherwise + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SIGNBITF4_H___ +#define ___SIMD_MATH_SIGNBITF4_H___ + +#include +#include + +static inline vector unsigned int +_signbitf4 (vector float x) +{ + return spu_cmpgt( spu_splats(0), (vec_int4)x ); +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/sincosd2.h b/Extras/simdmathlibrary/spu/simdmath/sincosd2.h new file mode 100644 index 000000000..f8482cde3 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/sincosd2.h @@ -0,0 +1,149 @@ +/* sind2 and cosd2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH__SINCOSD2_H___ +#define ___SIMD_MATH__SINCOSD2_H___ + +#include +#include + +#include +#include +#include +#include + +// +// Computes the sine of the each of two double slots. +// +static inline void +_sincosd2(vector double x, vec_double2 *s, vec_double2 *c) +{ + vec_double2 xl,xl2,xl3; + vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull); + vec_double2 ts, tc; + + // Range reduction using : x = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); + vec_float4 xf = spu_roundtf(xl); + vec_int4 q = spu_convts(xf,0); + q = spu_shuffle(q,q,copyEven); + + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offsetSin = spu_and(q,spu_splats(0x3)); + vec_int4 offsetCos = spu_add(spu_splats(1),offsetSin); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_double2 qd = spu_extend(qf); + vec_double2 p1 = spu_nmsub(qd,spu_splats(__SINCOSD_KC1),x); + xl = spu_nmsub(qd,spu_splats(__SINCOSD_KC2),p1); + + // Check if |xl| is a really small number + // + vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull)); + vec_ullong2 isTiny = (vec_ullong2)_isgreaterd2(tiny,absXl); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and + // sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5) + // + + vec_double2 ct0 = spu_mul(xl2,xl2); + vec_double2 ct1 = spu_madd(spu_splats(__SINCOSD_CC0),xl2,spu_splats(__SINCOSD_CC1)); + vec_double2 ct2 = spu_madd(spu_splats(__SINCOSD_CC2),xl2,spu_splats(__SINCOSD_CC3)); + vec_double2 ct3 = spu_madd(spu_splats(__SINCOSD_CC4),xl2,spu_splats(__SINCOSD_CC5)); + vec_double2 st1 = spu_madd(spu_splats(__SINCOSD_SC0),xl2,spu_splats(__SINCOSD_SC1)); + vec_double2 st2 = spu_madd(spu_splats(__SINCOSD_SC2),xl2,spu_splats(__SINCOSD_SC3)); + vec_double2 st3 = spu_madd(spu_splats(__SINCOSD_SC4),xl2,spu_splats(__SINCOSD_SC5)); + vec_double2 ct4 = spu_madd(ct2,ct0,ct3); + vec_double2 st4 = spu_madd(st2,ct0,st3); + vec_double2 ct5 = spu_mul(ct0,ct0); + + vec_double2 ct6 = spu_madd(ct5,ct1,ct4); + vec_double2 st6 = spu_madd(ct5,st1,st4); + + vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0)); + vec_double2 sx = spu_madd(st6,xl3,xl); + + // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0 + // + sx = spu_sel(sx,xl,isTiny); + cx = spu_sel(cx,spu_splats(1.0),isTiny); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_ullong2 sinMask = (vec_ullong2)spu_cmpeq(spu_and(offsetSin,(int)0x1),spu_splats((int)0)); + vec_ullong2 cosMask = (vec_ullong2)spu_cmpeq(spu_and(offsetCos,(int)0x1),spu_splats((int)0)); + ts = spu_sel(cx,sx,sinMask); + tc = spu_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = (vec_ullong2)spu_cmpeq(spu_and(offsetSin,(int)0x2),spu_splats((int)0)); + sinMask = spu_shuffle(sinMask,sinMask,copyEven); + ts = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)ts),ts,sinMask); + + cosMask = (vec_ullong2)spu_cmpeq(spu_and(offsetCos,(int)0x2),spu_splats((int)0)); + cosMask = spu_shuffle(cosMask,cosMask,copyEven); + tc = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)tc),tc,cosMask); + + // if input = +/-Inf return NAN + // + ts = spu_sel(ts, nan, _isnand2 (x)); + tc = spu_sel(tc, nan, _isnand2 (x)); + + // if input = 0 or denorm return 'result0' + // + vec_ullong2 zeroMask = _is0denormd2 (x); + ts = spu_sel(ts,x,zeroMask); + tc = spu_sel(tc,spu_splats(1.0),zeroMask); + + *s = ts; + *c = tc; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/sincosf4.h b/Extras/simdmathlibrary/spu/simdmath/sincosf4.h new file mode 100644 index 000000000..db9cf8df9 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/sincosf4.h @@ -0,0 +1,113 @@ +/* sincosf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SINCOSF4_H___ +#define ___SIMD_MATH_SINCOSF4_H___ + +#include +#include + +#include + +// +// Computes both the sine and cosine of the all four slots of x +// by using a polynomial approximation. +// +static inline void +_sincosf4 (vector float x, vector float *s, vector float *c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 q; + vec_int4 offsetSin, offsetCos; + vec_float4 ts, tc; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); + + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = spu_and(q,spu_splats((int)0x3)); + offsetCos = spu_add(spu_splats(1),offsetSin); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(__SINCOSF_KC1),x); + xl = spu_nmsub(qf,spu_splats(__SINCOSF_KC2),p1); + + // Compute x^2 and x^3 + // + xl2 = spu_mul(xl,xl); + xl3 = spu_mul(xl2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 ct1 = spu_madd(spu_splats(__SINCOSF_CC0),xl2,spu_splats(__SINCOSF_CC1)); + vec_float4 st1 = spu_madd(spu_splats(__SINCOSF_SC0),xl2,spu_splats(__SINCOSF_SC1)); + + vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(__SINCOSF_CC2)); + vec_float4 st2 = spu_madd(st1,xl2,spu_splats(__SINCOSF_SC2)); + + vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(st2,xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 sinMask = spu_cmpeq(spu_and(offsetSin,(int)0x1),spu_splats((int)0)); + vec_uint4 cosMask = spu_cmpeq(spu_and(offsetCos,(int)0x1),spu_splats((int)0)); + ts = spu_sel(cx,sx,sinMask); + tc = spu_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = spu_cmpeq(spu_and(offsetSin,(int)0x2),spu_splats((int)0)); + cosMask = spu_cmpeq(spu_and(offsetCos,(int)0x2),spu_splats((int)0)); + + ts = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)ts),ts,sinMask); + tc = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)tc),tc,cosMask); + + *s = ts; + *c = tc; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/sind2.h b/Extras/simdmathlibrary/spu/simdmath/sind2.h new file mode 100644 index 000000000..a2f4a17b2 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/sind2.h @@ -0,0 +1,49 @@ +/* sind2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SIND2_H___ +#define ___SIMD_MATH_SIND2_H___ + +#include +#include + +#include + +// +// Computes the sine of the each of two double slots. +// +static inline vector double +_sind2 (vector double x) +{ + vec_double2 s, c; + _sincosd2(x, &s, &c); + return s; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/sinf4.h b/Extras/simdmathlibrary/spu/simdmath/sinf4.h new file mode 100644 index 000000000..b2f1aac0b --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/sinf4.h @@ -0,0 +1,49 @@ +/* sinf4 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SINF4_H___ +#define ___SIMD_MATH_SINF4_H___ + +#include +#include + +#include + +// +// Computes the sine of each of the four slots by using a polynomia approximation +// +static inline vector float +_sinf4 (vector float x) +{ + vec_float4 s, c; + _sincosf4(x, &s, &c); + return s; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/sqrtd2.h b/Extras/simdmathlibrary/spu/simdmath/sqrtd2.h new file mode 100644 index 000000000..60a17d23d --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/sqrtd2.h @@ -0,0 +1,113 @@ +/* sqrtd2 - for each of two double slots, compute square root. + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_SQRTD2_H___ +#define ___SIMD_MATH_SQRTD2_H___ + +#include +#include + +#include +#include +#include + +// +// Handles exceptional values as follows: +// NaN -> NaN +// -Inf -> Nan +// -Finite -> Nan +// Denormal inputs are treated as zero. + +static inline vector double +_sqrtd2 (vector double x) +{ + vec_ullong2 expmask, onemask, signmask, evenexp; + vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, neg, result; + vec_float4 halff, onef, manf, y0f, y1f; + + expmask = spu_splats(0x7ff0000000000000ull); + onemask = spu_splats(0x0010000000000000ull); + signmask = spu_splats(0x8000000000000000ull); + onef = spu_splats(1.0f); + one = spu_extend( onef ); + halff = spu_splats(0.5f); + half = spu_extend( halff ); + + // First compute reciprocal square root. + // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) + // where i = 0 when exponent is even and i = 1 when exponent is odd. + // + // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: + // + // biased_exp = 1023 + exponent + // new_biased_exp = 1023 - (exponent+i)/2 + // = 1023 - (biased_exp-1023+i)/2 + // = (3069 - (biased_exp+i)) / 2 + + evenexp = spu_and( (vec_ullong2)x, onemask ); + man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); + + exp = spu_and( x, (vec_double2)expmask ); + nexp = spu_or( exp, (vec_double2)onemask ); + nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); + + // Compute mantissa part in single precision. + // Convert back to double and multiply with 2^(-(exponent+i)/2), then + // do two Newton-Raphson steps for full precision. + + manf = spu_roundtf( man ); + y0f = spu_rsqrte( manf ); + y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); + y1 = spu_mul( spu_extend( y1f ), nexp ); + y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); + y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); + + // Multiply by input to get square root. + + y3 = spu_mul( y3, x ); + + // Choose iterated result or special value. + + zero = spu_and( x, (vec_double2)signmask ); + inf = (vec_double2)expmask; + nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + + neg = spu_and(x, (vec_double2)spu_splats(0x8000000000000000ull)); + neg = spu_shuffle(neg, neg, ((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8})); + neg = (vec_double2)spu_rlmaska((vec_int4)neg, -31); + + result = spu_sel( y3, inf, _isinfd2 ( x ) ); + result = spu_sel( result, nan, _isnand2 ( x ) ); + result = spu_sel( result, nan, (vec_ullong2)neg ); + result = spu_sel( result, zero, _is0denormd2 ( x ) ); + + return result; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/sqrtf4.c b/Extras/simdmathlibrary/spu/simdmath/sqrtf4.h similarity index 79% rename from Extras/simdmathlibrary/spu/sqrtf4.c rename to Extras/simdmathlibrary/spu/simdmath/sqrtf4.h index 8e8b94b45..ae3f8f629 100644 --- a/Extras/simdmathlibrary/spu/sqrtf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/sqrtf4.h @@ -29,19 +29,24 @@ // Undefined if input < 0. +#ifndef ___SIMD_MATH_SQRTF4_H___ +#define ___SIMD_MATH_SQRTF4_H___ + #include #include -vector float sqrtf4 (vector float x) +static inline vector float +_sqrtf4 (vector float x) { - // Reciprocal square root estimate and 1 Newton-Raphson iteration. + // Reciprocal square root estimate and 1 Newton-Raphson iteration. - vec_float4 y0, y0x, y0xhalf; - vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); + vec_float4 y0, y0x, y0xhalf; + vec_float4 oneish = (vec_float4)spu_splats(0x3f800001); - y0 = spu_rsqrte( x ); - y0x = spu_mul( y0, x ); - y0xhalf = spu_mul( y0x, spu_splats(0.5f) ); - return spu_madd( spu_nmsub( y0, y0x, oneish ), y0xhalf, y0x ); + y0 = spu_rsqrte( x ); + y0x = spu_mul( y0, x ); + y0xhalf = spu_mul( y0x, spu_splats(0.5f) ); + return spu_madd( spu_nmsub( y0, y0x, oneish ), y0xhalf, y0x ); } +#endif diff --git a/Extras/simdmathlibrary/spu/simdmath/tand2.h b/Extras/simdmathlibrary/spu/simdmath/tand2.h new file mode 100644 index 000000000..c96907a21 --- /dev/null +++ b/Extras/simdmathlibrary/spu/simdmath/tand2.h @@ -0,0 +1,125 @@ +/* tand2 + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ___SIMD_MATH_TAND2_H___ +#define ___SIMD_MATH_TAND2_H___ + +#include +#include + +#include +#include +#include +#include + +#define __TAND_CC0 -0.00020844956382258822 +#define __TAND_CC1 0.02334489464693293380 +#define __TAND_CC2 -0.46161689768996201755 +#define __TAND_SC0 -0.00000748373924372997 +#define __TAND_SC1 0.00280592875035233052 +#define __TAND_SC2 -0.12828356435663158978 + +/* + * Computes the tangent of the given angles by first reducing the + * range to [-pi/4..pi/4] and performing the appropriate sin/cos ratio + */ +static inline vector double +_tand2 (vector double x) +{ + vec_double2 xl,x2,x3,res; + vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); + vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); + + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); + vec_float4 xf = spu_roundtf(xl); + vec_int4 q = spu_convts(xf,0); + q = spu_shuffle(q,q,copyEven); + + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_double2 qd = spu_extend(qf); + vec_double2 p1 = spu_nmsub(qd,spu_splats(__SINCOSD_KC1),x); + xl = spu_nmsub(qd,spu_splats(__SINCOSD_KC2),p1); + + // Compute x^2 and x^3 + // + x2 = spu_mul(xl,xl); + x3 = spu_mul(x2,xl); + + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * ((C0 * x2 + C1) * x2 + C2), and + // sx = x + x3 * ((S0 * x2 + S1) * x2 + S2) + // + vec_double2 ct1 = spu_madd(spu_splats(__TAND_CC0),x2,spu_splats(__TAND_CC1)); + vec_double2 st1 = spu_madd(spu_splats(__TAND_SC0),x2,spu_splats(__TAND_SC1)); + + vec_double2 ct2 = spu_madd(ct1,x2,spu_splats(__TAND_CC2)); + vec_double2 st2 = spu_madd(st1,x2,spu_splats(__TAND_SC2)); + + vec_double2 cx = spu_madd(ct2,x2,spu_splats(1.0)); + vec_double2 sx = spu_madd(st2,x3,xl); + + + // Compute both cx/sx and sx/cx + // + vec_double2 cxosx = _divd2(cx,sx); + vec_double2 sxocx = _divd2(sx,cx); + + vec_double2 ncxosx = (vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)cxosx); + + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vec_ullong2 mask = (vec_ullong2)spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); + res = spu_sel(ncxosx,sxocx,mask); + + // If input = +/-Inf return NAN + // + res = spu_sel(res,nan,_isinfd2 (x)); + + // If input =0 or denorm return input + // + res = spu_sel(res,x, _is0denormd2 (x)); + + return res; +} + +#endif diff --git a/Extras/simdmathlibrary/spu/tanf4.c b/Extras/simdmathlibrary/spu/simdmath/tanf4.h similarity index 53% rename from Extras/simdmathlibrary/spu/tanf4.c rename to Extras/simdmathlibrary/spu/simdmath/tanf4.h index 10c53c4fe..889466989 100644 --- a/Extras/simdmathlibrary/spu/tanf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/tanf4.h @@ -27,65 +27,69 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_TANF4_H___ +#define ___SIMD_MATH_TANF4_H___ + #include #include +#include +#include -#define _TAN_KC1 1.57079625129f -#define _TAN_KC2 7.54978995489e-8f - -vector float -tanf4 (vector float x) +static inline vector float +_tanf4 (vector float x) { - vector float xl,x2,x3,res; - vector signed int q; + vector float xl,x2,x3,res; + vector signed int q; - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236f)); + // Range reduction using : xl = angle * TwoOverPi; + // + xl = spu_mul(x, spu_splats(0.63661977236f)); - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(x))*sign(x)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); - q = spu_convts(xl,0); + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(x))*sign(x)) + // + xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); + q = spu_convts(xl,0); - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_float4 p1 = spu_nmsub(qf,spu_splats(_TAN_KC1),x); - xl = spu_nmsub(qf,spu_splats(_TAN_KC2),p1); + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = spu_convtf(q,0); + vec_float4 p1 = spu_nmsub(qf,spu_splats(__SINCOSF_KC1),x); + xl = spu_nmsub(qf,spu_splats(__SINCOSF_KC2),p1); - // Compute x^2 and x^3 - // - x2 = spu_mul(xl,xl); - x3 = spu_mul(x2,xl); + // Compute x^2 and x^3 + // + x2 = spu_mul(xl,xl); + x3 = spu_mul(x2,xl); - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + x2 * (C0 * x2 + C1), and - // sx = xl + x3 * S0 - // - vec_float4 ct2 = spu_madd(spu_splats( 0.0097099364f),x2,spu_splats(-0.4291161787f)); + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + x2 * (C0 * x2 + C1), and + // sx = xl + x3 * S0 + // + vec_float4 ct2 = spu_madd(spu_splats( 0.0097099364f),x2,spu_splats(-0.4291161787f)); - vec_float4 cx = spu_madd(ct2,x2,spu_splats(1.0f)); - vec_float4 sx = spu_madd(spu_splats(-0.0957822992f),x3,xl); + vec_float4 cx = spu_madd(ct2,x2,spu_splats(1.0f)); + vec_float4 sx = spu_madd(spu_splats(-0.0957822992f),x3,xl); - // Compute both cx/sx and sx/cx - // - vec_float4 cxosx = divf4(cx,sx); - vec_float4 sxocx = divf4(sx,cx); + // Compute both cx/sx and sx/cx + // + vec_float4 cxosx = _divf4(cx,sx); + vec_float4 sxocx = _divf4(sx,cx); - vec_float4 ncxosx = (vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)cxosx); + vec_float4 ncxosx = (vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)cxosx); - // For odd numbered quadrants return -cx/sx , otherwise return - // sx/cx - // - vec_uchar16 mask = (vec_uchar16)spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); - res = spu_sel(ncxosx,sxocx,mask); + // For odd numbered quadrants return -cx/sx , otherwise return + // sx/cx + // + vec_uint4 mask = spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); + res = spu_sel(ncxosx,sxocx,mask); - return res; + return res; } + +#endif diff --git a/Extras/simdmathlibrary/spu/truncd2.c b/Extras/simdmathlibrary/spu/simdmath/truncd2.h similarity index 95% rename from Extras/simdmathlibrary/spu/truncd2.c rename to Extras/simdmathlibrary/spu/simdmath/truncd2.h index 3d133c53d..a5201ab43 100644 --- a/Extras/simdmathlibrary/spu/truncd2.c +++ b/Extras/simdmathlibrary/spu/simdmath/truncd2.h @@ -28,11 +28,14 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_TRUNCD2_H___ +#define ___SIMD_MATH_TRUNCD2_H___ + #include #include -vector double -truncd2(vector double in) +static inline vector double +_truncd2(vector double in) { vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}); vec_int4 exp, shift; @@ -60,4 +63,4 @@ truncd2(vector double in) return (out); } - +#endif diff --git a/Extras/simdmathlibrary/spu/truncf4.c b/Extras/simdmathlibrary/spu/simdmath/truncf4.h similarity index 78% rename from Extras/simdmathlibrary/spu/truncf4.c rename to Extras/simdmathlibrary/spu/simdmath/truncf4.h index 11ef30649..b51916014 100644 --- a/Extras/simdmathlibrary/spu/truncf4.c +++ b/Extras/simdmathlibrary/spu/simdmath/truncf4.h @@ -27,22 +27,26 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef ___SIMD_MATH_TRUNCF4_H___ +#define ___SIMD_MATH_TRUNCF4_H___ + #include #include -vector float -truncf4 (vector float x) +static inline vector float +_truncf4 (vector float x) { - vector signed int xi; - vector unsigned int inrange; + vector signed int xi; + vector unsigned int inrange; - // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). - // However, no truncation needed if exponent > 22. + // Can convert to and from signed integer to truncate values in range [-2^31, 2^31). + // However, no truncation needed if exponent > 22. - inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); + inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x ); - xi = spu_convts( x, 0 ); + xi = spu_convts( x, 0 ); - return spu_sel( x, spu_convtf( xi, 0 ), inrange ); + return spu_sel( x, spu_convtf( xi, 0 ), inrange ); } +#endif diff --git a/Extras/simdmathlibrary/spu/sincosf4.c b/Extras/simdmathlibrary/spu/sincosf4.c deleted file mode 100644 index 0a4fc9d94..000000000 --- a/Extras/simdmathlibrary/spu/sincosf4.c +++ /dev/null @@ -1,104 +0,0 @@ -/* sincosf4 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "sincos_c.h" - -// -// Computes both the sine and cosine of the all four slots of x -// by using a polynomial approximation. -// -void sincosf4 (vector float x, vector float *s, vector float *c) -{ - vec_float4 xl,xl2,xl3; - vec_int4 q; - vec_int4 offsetSin, offsetCos; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); - q = spu_convts(xl,0); - - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - // - offsetSin = spu_and(q,spu_splats((int)0x3)); - offsetCos = spu_add(spu_splats(1),offsetSin); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); - xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = spu_mul(xl,xl); - xl3 = spu_mul(xl2,xl); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); - vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); - - vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); - vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); - - vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); - vec_float4 sx = spu_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uchar16 sinMask = (vec_uchar16)spu_cmpeq(spu_and(offsetSin,(int)0x1),spu_splats((int)0)); - vec_uchar16 cosMask = (vec_uchar16)spu_cmpeq(spu_and(offsetCos,(int)0x1),spu_splats((int)0)); - *s = spu_sel(cx,sx,sinMask); - *c = spu_sel(cx,sx,cosMask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - sinMask = (vec_uchar16)spu_cmpeq(spu_and(offsetSin,(int)0x2),spu_splats((int)0)); - cosMask = (vec_uchar16)spu_cmpeq(spu_and(offsetCos,(int)0x2),spu_splats((int)0)); - - *s = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)*s),*s,sinMask); - *c = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)*c),*c,cosMask); - -} - diff --git a/Extras/simdmathlibrary/spu/sind2.c b/Extras/simdmathlibrary/spu/sind2.c deleted file mode 100644 index 5df3417ac..000000000 --- a/Extras/simdmathlibrary/spu/sind2.c +++ /dev/null @@ -1,130 +0,0 @@ -/* sind2 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "sincos_c.h" - -// -// Computes the sine of the each of two double slots. -// -vector double sind2 (vector double x) -{ - vec_double2 xl,xl2,xl3,res; - vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); - vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull); - - // Range reduction using : x = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(x))*sign(x)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); - vec_float4 xf = spu_roundtf(xl); - vec_int4 q = spu_convts(xf,0); - q = spu_shuffle(q,q,copyEven); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = spu_and(q,spu_splats(0x3)); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_double2 qd = spu_extend(qf); - vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x); - xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1); - - // Check if |xl| is a really small number - // - vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull)); - vec_ullong2 isTiny = (vec_ullong2)isgreaterd2 (tiny,absXl); - - // Compute x^2 and x^3 - // - xl2 = spu_mul(xl,xl); - xl3 = spu_mul(xl2,xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and - // sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5) - // - - vec_double2 ct0 = spu_mul(xl2,xl2); - vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D)); - vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D)); - vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D)); - vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D)); - vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D)); - vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D)); - vec_double2 ct4 = spu_madd(ct2,ct0,ct3); - vec_double2 st4 = spu_madd(st2,ct0,st3); - vec_double2 ct5 = spu_mul(ct0,ct0); - - vec_double2 ct6 = spu_madd(ct5,ct1,ct4); - vec_double2 st6 = spu_madd(ct5,st1,st4); - - vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0)); - vec_double2 sx = spu_madd(st6,xl3,xl); - - // Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0 - // - sx = spu_sel(sx,xl,isTiny); - cx = spu_sel(cx,spu_splats(1.0),isTiny); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); - res = spu_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); - mask2 = spu_shuffle(mask2,mask2,copyEven); - res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2); - - // if input = +/-Inf return NAN - // - res = spu_sel(res, nan, isnand2 (x)); - - // if input = 0 or denorm return input - // - vec_ullong2 zeroMask = is0denormd2 (x); - res = spu_sel(res,x,zeroMask); - - - return res; -} - diff --git a/Extras/simdmathlibrary/spu/sinf4.c b/Extras/simdmathlibrary/spu/sinf4.c deleted file mode 100644 index 6ad160825..000000000 --- a/Extras/simdmathlibrary/spu/sinf4.c +++ /dev/null @@ -1,97 +0,0 @@ -/* sinf4 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "sincos_c.h" - -// -// Computes the sine of each of the four slots by using a polynomia approximation -// -vector float sinf4 (vector float x) -{ - vec_float4 xl,xl2,xl3,res; - vec_int4 q; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000))); - q = spu_convts(xl,0); - - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = spu_and(q,spu_splats((int)0x3)); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x); - xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1); - - // Compute x^2 and x^3 - // - xl2 = spu_mul(xl,xl); - xl3 = spu_mul(xl2,xl); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1)); - vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1)); - - vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2)); - vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2)); - - vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f)); - vec_float4 sx = spu_madd(st2,xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0)); - res = spu_sel(cx,sx,mask1); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0)); - res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2); - - return res; - -} - diff --git a/Extras/simdmathlibrary/spu/sqrtd2.c b/Extras/simdmathlibrary/spu/sqrtd2.c deleted file mode 100644 index c08f4754e..000000000 --- a/Extras/simdmathlibrary/spu/sqrtd2.c +++ /dev/null @@ -1,101 +0,0 @@ -/* sqrtd2 - for each of two double slots, compute square root. - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - - -// -// Handles exceptional values as follows: -// NaN -> NaN -// -Inf -> Nan -// -Finite -> Nan -// Denormal inputs are treated as zero. - -vector double -sqrtd2 (vector double x) -{ - vec_ullong2 expmask, onemask, signmask, evenexp; - vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result; - vec_float4 halff, onef, manf, y0f, y1f; - - expmask = spu_splats(0x7ff0000000000000ull); - onemask = spu_splats(0x0010000000000000ull); - signmask = spu_splats(0x8000000000000000ull); - onef = spu_splats(1.0f); - one = spu_extend( onef ); - halff = spu_splats(0.5f); - half = spu_extend( halff ); - - // First compute reciprocal square root. - // Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) ) - // where i = 0 when exponent is even and i = 1 when exponent is odd. - // - // Compute reciprocal-square-root of second factor by finding -(exponent+i)/2: - // - // biased_exp = 1023 + exponent - // new_biased_exp = 1023 - (exponent+i)/2 - // = 1023 - (biased_exp-1023+i)/2 - // = (3069 - (biased_exp+i)) / 2 - - evenexp = spu_and( (vec_ullong2)x, onemask ); - man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask ); - - exp = spu_and( x, (vec_double2)expmask ); - nexp = spu_or( exp, (vec_double2)onemask ); - nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 ); - - // Compute mantissa part in single precision. - // Convert back to double and multiply with 2^(-(exponent+i)/2), then - // do two Newton-Raphson steps for full precision. - - manf = spu_roundtf( man ); - y0f = spu_rsqrte( manf ); - y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f ); - y1 = spu_mul( spu_extend( y1f ), nexp ); - y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 ); - y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 ); - - // Multiply by input to get square root. - - y3 = spu_mul( y3, x ); - - // Choose iterated result or special value. - - zero = spu_and( x, (vec_double2)signmask ); - inf = (vec_double2)expmask; - nan = (vec_double2)spu_splats(0x7ff8000000000000ull); - - result = spu_sel( y3, inf, isinfd2 ( x ) ); - result = spu_sel( result, nan, isnand2 ( x ) ); - result = spu_sel( result, zero, is0denormd2 ( x ) ); - - return result; -} - diff --git a/Extras/simdmathlibrary/spu/tand2.c b/Extras/simdmathlibrary/spu/tand2.c deleted file mode 100644 index 479436d31..000000000 --- a/Extras/simdmathlibrary/spu/tand2.c +++ /dev/null @@ -1,117 +0,0 @@ -/* tand2 - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#define _TAND2_CC0 -0.00020844956382258822 -#define _TAND2_CC1 0.02334489464693293380 -#define _TAND2_CC2 -0.46161689768996201755 -#define _TAND2_SC0 -0.00000748373924372997 -#define _TAND2_SC1 0.00280592875035233052 -#define _TAND2_SC2 -0.12828356435663158978 -#define _TAND2_KC1 (13176794.0 / 8388608.0) -#define _TAND2_KC2 7.5497899548918821691639751442098584e-8 - -/* - * Computes the tangent of the given angles by first reducing the - * range to [-pi/4..pi/4] and performing the appropriate sin/cos ratio - */ -vector double -tand2 (vector double x) -{ - vec_double2 xl,x2,x3,res; - vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull); - vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b }; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744)); - - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull))); - vec_float4 xf = spu_roundtf(xl); - vec_int4 q = spu_convts(xf,0); - q = spu_shuffle(q,q,copyEven); - - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = spu_convtf(q,0); - vec_double2 qd = spu_extend(qf); - vec_double2 p1 = spu_nmsub(qd,spu_splats(_TAND2_KC1),x); - xl = spu_nmsub(qd,spu_splats(_TAND2_KC2),p1); - - // Compute x^2 and x^3 - // - x2 = spu_mul(xl,xl); - x3 = spu_mul(x2,xl); - - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + x2 * ((C0 * x2 + C1) * x2 + C2), and - // sx = x + x3 * ((S0 * x2 + S1) * x2 + S2) - // - vec_double2 ct1 = spu_madd(spu_splats(_TAND2_CC0),x2,spu_splats(_TAND2_CC1)); - vec_double2 st1 = spu_madd(spu_splats(_TAND2_SC0),x2,spu_splats(_TAND2_SC1)); - - vec_double2 ct2 = spu_madd(ct1,x2,spu_splats(_TAND2_CC2)); - vec_double2 st2 = spu_madd(st1,x2,spu_splats(_TAND2_SC2)); - - vec_double2 cx = spu_madd(ct2,x2,spu_splats(1.0)); - vec_double2 sx = spu_madd(st2,x3,xl); - - - // Compute both cx/sx and sx/cx - // - vec_double2 cxosx = divd2(cx,sx); - vec_double2 sxocx = divd2(sx,cx); - - vec_double2 ncxosx = (vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)cxosx); - - // For odd numbered quadrants return -cx/sx , otherwise return - // sx/cx - // - vec_ullong2 mask = (vec_ullong2)spu_cmpeq(spu_and(q,(int)0x1),spu_splats((int)0)); - res = spu_sel(ncxosx,sxocx,mask); - - // If input = +/-Inf return NAN - // - res = spu_sel(res,nan,isinfd2 (x)); - - // If input =0 or denorm return input - // - res = spu_sel(res,x, is0denormd2 (x)); - - return res; -} diff --git a/Extras/simdmathlibrary/spu/tests/Makefile b/Extras/simdmathlibrary/spu/tests/Makefile index 014fb4345..904571b33 100644 --- a/Extras/simdmathlibrary/spu/tests/Makefile +++ b/Extras/simdmathlibrary/spu/tests/Makefile @@ -44,7 +44,7 @@ TESTS = fabsd2 fabsf4 truncf4 divf4 recipd2 divd2 sqrtf4 \ ALL_TESTS = $(TESTS) -INCLUDES_SPU = -I../../ +INCLUDES_SPU = -I../../common CROSS_SPU = spu- AR_SPU = $(CROSS_SPU)ar @@ -52,7 +52,7 @@ CC_SPU = $(CROSS_SPU)gcc CXX_SPU = $(CROSS_SPU)g++ TEST_CMD_SPU = -CFLAGS_SPU=$(INCLUDES_SPU) -O2 -W -Wall +CFLAGS_SPU=$(INCLUDES_SPU) -O2 -W -Wall LDFLAGS_SPU=-L../ -l$(LIB_BASE) -lm MAKE_DEFS = \ @@ -99,21 +99,8 @@ check: $(ALL_TESTS) ../$(STATIC_LIB): cd ../;$(MAKE) $(MAKE_DEFS) $(STATIC_LIB) -%.o: %.c +%.o: %.c ../../common/common-test.h testutils.h $(CC_SPU) $(CFLAGS_SPU) -c $< -#---------- -# C++ -#---------- -%.o: %.C - $(CXX_SPU) $(CFLAGS_SPU) -c $< - -%.o: %.cpp - $(CXX_SPU) $(CFLAGS_SPU) -c $< - %.o: %.cc $(CXX_SPU) $(CFLAGS_SPU) -c $< - -%.o: %.cxx - $(CXX_SPU) $(CFLAGS_SPU) -c $< - diff --git a/Extras/simdmathlibrary/spu/tests/common-test.h b/Extras/simdmathlibrary/spu/tests/common-test.h deleted file mode 100644 index 4f931deca..000000000 --- a/Extras/simdmathlibrary/spu/tests/common-test.h +++ /dev/null @@ -1,201 +0,0 @@ -/* SIMD math library - common testsuite part for SPU - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - - - -#include -static inline unsigned int clock() -{ - unsigned int ret; - int tmp = 0; - __asm __volatile__ ( "syscall %0,%1,0x2b\n" - : "=r" (ret) - : "r" (tmp) - : "memory" ); - return (ret); -} -// Test files begin with TEST_SET_START("your initials","test set description") -// Individual tests begin with TEST_START("name of test") -// and end with TEST_PASS(), TEST_FAIL("reason for failure") or TEST_CHECK() -// Or you can run a test encapsulated in a function with: -// TEST_FUNCTION("name of test", function(), "reason for failure") -// -// The clock starts when you call TEST_START and stops with TEST_PASS, TEST_FAIL or TEST_CHECK -// After a start there can be several PASS, FAIL or CHECK calls, each one counts as a test, time is measured from the prior call -// -char - *__initials, // Test owner's initials - *__description, // short descriptive name for this test set - *__name, // name of the currently running test - *__set_id; // id of the the test set -int -// __zip=0, - __success=1, // set to 0 if any tests failed - __count, // Total number of tests run - __passed; // Total number of tests passed -unsigned int - __ttemp, - __time, // For timing tests (usually start time of last test) - __ttime; // Cumulative test runtime NOT counting runtime of the TEST macros - -// TEST_SET_START -// Call at the start of a set of related tests to identify them -// Prints a "start of set banner message" -// set_id - unique test set identifyer a time in the format yyyymmddhhmmss followed by your initials ie: 20040716104615GAC -// initials - your initials -// description - brief descriptive name for this test set -#define TEST_SET_START(set_id,initials,description) \ - do { \ - __set_id=set_id; \ - __initials=initials; \ - __description=description; \ - __count=0; \ - __passed=0; \ - __time=0; \ - __ttime=0; \ - printf("0\t%s\t%d\t%s\tSTART\tpassed\ttotal\ttime\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,__set_id, __description); \ - } while(0) - -// TEST_START -// Begins a test, and starts the clock -// name - brief name for this test -#define TEST_START(name) \ - do { \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - __name=name; \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - } while(0) - -// TEST_PASS -// Indicates the test passed -// test_id - unique test ID number, same format as the set_id number -// This should match the id provided to the matching TEST_FAIL call -#define TEST_PASS(test_id) \ - do { \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ - __time=__ttemp-__time; \ - __ttime+=__time; \ - __count++; \ - __passed++; \ - printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - } while(0) - -// __time=clock(); -// __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (__zip) : "memory" ); -// __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (__zip) : "memory" ); - -// TEST_FAIL -// Indicates the test failed -// test_id - unique test ID number, same format as the set_id number -// This should match the id provided to the matching TEST_PASS call -// why - brief description of why it failed -#define TEST_FAIL(test_id,why,error_code) \ - do { \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ - __time=__ttemp-__time; \ - __ttime+=__time; \ - __count++; \ - __success=0; \ - printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,error_code); \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - } while(0) - -// TEST_CHECK -// Passes or fails the test after evaluating the "test" argument (just like assert but without terminating the program) -// The clock is immediately stopped so the time required to evaluate "test" will NOT be included in the reported time -// If the test failed, the reason will be printed as FAILED BECAUSE: check (value of "test") failed -// test_id - unique test ID number, same format as the set_id number -// test - expression evaluating to true/false -#define TEST_CHECK(test_id,test,error_code) \ - do { \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ - __time=__ttemp-__time; \ - __ttime+=__time; \ - __count++; \ - if(test) \ - { \ - __passed++; \ - printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ - } \ - else \ - { \ - __success=0; \ - printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: check %s failed\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,#test,error_code); \ - } \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - } while(0) - -// TEST_FUNCTION -// Runs a test encapsulated in a function that returns 0 if the test passed and an error number if it failed -// The clock is started on calling the function and stopped as soon as it returns so the branching logic will not be included in the time -// test_id - unique test ID number, same format as the set_id number -// name - brief name for the test -// func - function invocation (should include parenthesis, may have arguments) -// why - brief description to print if the test fails -#define TEST_FUNCTION(test_id,name,func,why) \ - do { \ - TEST_START(name); \ - int result=func; \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__ttemp) : "r" (0) : "memory" ); \ - __time=__ttemp-__time; \ - __ttime+=__time; \ - __count++; \ - if(result==0) \ - { \ - __passed++; \ - printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \ - } \ - else \ - { \ - __success=0; \ - printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,result); \ - } \ - __asm __volatile__ ( "syscall %0,%1,0x2b\n" : "=r" (__time) : "r" (0) : "memory" ); \ - } while(0) - -// TEST_SET_DONE -// Ends a set of tests, prints out the closing banner (OK if all tests pass, PROBLEM if any fail) -// Also prints count of tests passed, tests run and total time -#define TEST_SET_DONE() \ - do { \ - printf("9\t%s\t%d\t%s\t%s\t%d\t%d\t%d\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,(__count==__passed)?"OK":"PROBLEM",__passed,__count,__ttime,__set_id,__description); \ - } while(0) - -// TEST_EXIT -// Call this ONCE at the very end of the test program, it calls "exit" to return -// EXIT_SUCCESS if all tests passed or EXIT_FAILURE if any tests failed. -// This allows the makefile/shell script running the tests to know which ones failed -#define TEST_EXIT() \ - do { \ - if(__success) \ - exit(0); \ - else \ - exit(-1); \ - } while (0) diff --git a/Extras/simdmathlibrary/spu/tests/divd2.c b/Extras/simdmathlibrary/spu/tests/divd2.c index 9e796c94b..66a6dca63 100644 --- a/Extras/simdmathlibrary/spu/tests/divd2.c +++ b/Extras/simdmathlibrary/spu/tests/divd2.c @@ -59,13 +59,13 @@ int main() unsigned long long i8d = 0x61f25e39867b0a9eull; unsigned long long i8r = 0x1403088aa08482f2ull; - double x0n = hide_double(-1.0/0.0); // -Inf/ Inf == NaN - double x0d = hide_double(1.0/0.0); + double x0n = hide_double(-HUGE_VAL); // -Inf/ Inf == NaN + double x0d = hide_double(HUGE_VAL); double x1n = hide_double(0.0); // 0 / 0 == NaN double x1d = hide_double(-0.0); - double x2n = hide_double(0.0/0.0); // NaN / 2 == NaN + double x2n = hide_double(nan("")); // NaN / 2 == NaN double x2d = hide_double(2.0); double x3n = hide_double(make_double(i3n)); diff --git a/Extras/simdmathlibrary/spu/tests/fabsd2.c b/Extras/simdmathlibrary/spu/tests/fabsd2.c index ac74c63e7..11e366dd4 100644 --- a/Extras/simdmathlibrary/spu/tests/fabsd2.c +++ b/Extras/simdmathlibrary/spu/tests/fabsd2.c @@ -46,8 +46,8 @@ int main() double x1p = hide_double(83532.96153153); double x2n = hide_double(-0.0000000013152); double x2p = hide_double(0.0000000013152); - double x3n = hide_double(-1.0/0.0); - double x3p = hide_double(1.0/0.0); + double x3n = hide_double(-HUGE_VAL); + double x3p = hide_double(HUGE_VAL); vec_double2 x0n_v = spu_splats(x0n); vec_double2 x0p_v = spu_splats(x0p); diff --git a/Extras/simdmathlibrary/spu/tests/fdimd2.c b/Extras/simdmathlibrary/spu/tests/fdimd2.c index d1b7e6e7f..d71a48ef2 100644 --- a/Extras/simdmathlibrary/spu/tests/fdimd2.c +++ b/Extras/simdmathlibrary/spu/tests/fdimd2.c @@ -76,16 +76,16 @@ int main() double x5min = hide_double(5.0e-324); double x5max = hide_double(1.0e-323); - double x5dim = hide_double(1.0e-323 - 5.0e-324); + double x5dim = hide_double(1.0e-323) - hide_double(5.0e-324); double x6min = hide_double(DBL_MAX); - double x6max = hide_double(1.0/0.0); + double x6max = hide_double(HUGE_VAL); - double x7min = hide_double(-1.0/0.0); + double x7min = hide_double(-HUGE_VAL); double x7max = hide_double(19355.03); - double x8min = hide_double(-1.0/0.0); - double x8max = hide_double(1.0/0.0); + double x8min = hide_double(-HUGE_VAL); + double x8max = hide_double(HUGE_VAL); vec_double2 x0min_v = spu_splats(x0min); vec_double2 x0max_v = spu_splats(x0max); diff --git a/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h b/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h deleted file mode 100644 index 3ed3a3491..000000000 --- a/Extras/simdmathlibrary/spu/tests/floatingpoint_tests.h +++ /dev/null @@ -1,173 +0,0 @@ -/* Common part of testsuite for SPU SIMD Math library - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - */ - - - - -#ifndef _FLOATINGPOINT_TESTS_H_ -#define _FLOATINGPOINT_TESTS_H_ - -#if __PPC__ - #include - #define vec_uchar16 vector unsigned char - #define vec_char16 vector signed char - #define vec_ushort8 vector unsigned short - #define vec_short8 vector signed short - #define vec_uint4 vector unsigned int - #define vec_int4 vector signed int - #define vec_ullong2 vector unsigned long long - #define vec_llong2 vector signed long long - #define vec_float4 vector float - #define vec_double2 vector double -#else - #if __SPU__ - #include - #endif -#endif - -// To avoid type punning warnings (for printing in hex notation, doing bit-diff etc) -typedef union { - double d; - unsigned char uc[8]; - unsigned int ui[2]; - unsigned long long int ull; -} sce_math_alt_double; - -typedef union { - float f; - unsigned char uc[4]; - unsigned int ui; -} sce_math_alt_float; - -#if (__PPC__ || __SPU__) -typedef union { - vec_int4 vsi; - int si[4]; -} sce_math_alt_vec_int4; - -typedef union { - vec_uint4 vui; - int ui[4]; -} sce_math_alt_vec_uint4; - -typedef union { - vec_float4 vf; - float sf[4]; - unsigned int ui[4]; -} sce_math_alt_vec_float4; -#endif -#if __SPU__ - typedef union { - double sd[2]; - vec_double2 vd; - unsigned long long int ui[2]; - } sce_math_alt_vec_double2; -#endif - -#if __PPC__ - static inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = vec_sub(refi, valsi); - vec_int4 negdiff = vec_sub(((vec_int4)0), diff); - - return vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4)0) )); - } - static inline int bitdiff(float ref, float val) { - sce_math_alt_float aref, aval; - aref.f = ref; - aval.f = val; - int diff = aref.ui - aval.ui; - return (diff>0)?diff:-diff; - } - static inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = vec_sub(refi, valsi); - vec_int4 negdiff = vec_sub(((vec_int4)0), diff); - - diff = vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4)0) )); - vec_float4 logdiff = vec_loge(vec_ctf(diff,0)); - return vec_sub(((vec_int4)32), vec_cts(vec_ceil(logdiff),0)); - } - static inline int bitmatch(float ref, float val) { - sce_math_alt_vec_float4 aref, aval; - sce_math_alt_vec_int4 adiff; - aref.sf[0] = ref; - aval.sf[0] = val; - adiff.vsi = bitmatch4(aref.vf, aval.vf); - return adiff.si[0]; - } -#else - #if __SPU__ - static inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = spu_sub(refi, valsi); - vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - - return spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)); - } - static inline int bitdiff(float ref, float val) { - return spu_extract(bitdiff4(spu_promote(ref,0), spu_promote(val,0)), 0); - } - static inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) { - vec_int4 refi = (vec_int4)ref; - vec_int4 valsi = (vec_int4)vals; - vec_int4 diff = spu_sub(refi, valsi); - vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - - return (vec_int4)spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); - } - static inline int bitmatch(float ref, float val) { - return spu_extract(bitmatch4(spu_promote(ref,0), spu_promote(val,0)), 0); - } - - #else - inline int bitdiff(sce_math_alt_float ref, sce_math_alt_float val) { - int diff = ref.ui - val.ui; - return((diff>0)?diff:-diff); - } - inline int bitmatch(sce_math_alt_float ref, sce_math_alt_float val) { - int diff, i; - unsigned int udiff; - diff = ref.ui - val.ui; - udiff = (diff>0) ? diff : -diff; - i = 32; - while(udiff != 0) { - i = i-1; - udiff = udiff >> 1; - } - return udiff; - } - #endif // __SPU__ -#endif // __PPC__ - - -#endif // _FLOATINGPOINT_TESTS_H_ diff --git a/Extras/simdmathlibrary/spu/tests/fmad2.c b/Extras/simdmathlibrary/spu/tests/fmad2.c index 49a1ad3c7..2c3ea6a49 100644 --- a/Extras/simdmathlibrary/spu/tests/fmad2.c +++ b/Extras/simdmathlibrary/spu/tests/fmad2.c @@ -76,7 +76,7 @@ int main() double z2 = hide_double(-0.0); double x3 = hide_double(1.0); - double y3 = hide_double(1.0/0.0); + double y3 = hide_double(HUGE_VAL); double z3 = hide_double(-1.0); double x4 = norm_max; diff --git a/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c b/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c index 18f6402b3..bfaed2067 100644 --- a/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c +++ b/Extras/simdmathlibrary/spu/tests/fmind2_fmaxd2.c @@ -81,13 +81,13 @@ int main() double x5max = hide_double(1.0e-323); double x6min = norm_max; - double x6max = hide_double(1.0/0.0); + double x6max = hide_double(HUGE_VAL); - double x7min = hide_double(-1.0/0.0); + double x7min = hide_double(-HUGE_VAL); double x7max = hide_double(19355.03); - double x8min = hide_double(-1.0/0.0); - double x8max = hide_double(1.0/0.0); + double x8min = hide_double(-HUGE_VAL); + double x8max = hide_double(HUGE_VAL); double x9min = denorm_max; double x9max = norm_min; diff --git a/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c b/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c index 5ca3d6300..b7206bb07 100644 --- a/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c +++ b/Extras/simdmathlibrary/spu/tests/fpclassifyd2.c @@ -45,7 +45,7 @@ int main() long long r0 = FP_NAN; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); long long r1 = FP_INFINITE; // -Dmax @@ -97,7 +97,7 @@ int main() long long r13 = FP_NORMAL; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); long long r14 = FP_INFINITE; //+Nan @@ -117,11 +117,11 @@ int main() vec_llong2 r18_v = (vec_llong2) {FP_NAN, FP_NORMAL}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) {HUGE_VAL, -nan("") }; vec_llong2 r19_v = (vec_llong2) {FP_INFINITE, FP_NAN}; // Compound - vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { -1.0e-999, -HUGE_VAL} ; vec_llong2 r20_v = (vec_llong2) {FP_ZERO, FP_INFINITE}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c b/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c index c0691648f..e1cdc8d7b 100644 --- a/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c +++ b/Extras/simdmathlibrary/spu/tests/fpclassifyf4.c @@ -40,11 +40,11 @@ int main() TEST_SET_START("20060828000000AAN","AAN", "fpclassifyf4"); // -Nan - float x0 = hide_float(-nan("")); + float x0 = hide_float(-NANF); int r0 = FP_NORMAL; // -Inf - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VALF); int r1 = FP_NORMAL; // -Smax @@ -96,11 +96,11 @@ int main() int r13 = FP_NORMAL; // +Inf - float x14 = hide_float( 1.0/0.0); + float x14 = hide_float(HUGE_VALF); int r14 = FP_NORMAL; //+Nan - float x15 = hide_float( nan("")); + float x15 = hide_float(NANF); int r15 = FP_NORMAL; // Compound diff --git a/Extras/simdmathlibrary/spu/tests/frexpd2.c b/Extras/simdmathlibrary/spu/tests/frexpd2.c index f05e24b04..e7ddefd45 100644 --- a/Extras/simdmathlibrary/spu/tests/frexpd2.c +++ b/Extras/simdmathlibrary/spu/tests/frexpd2.c @@ -36,14 +36,6 @@ #include "common-test.h" #include "testutils.h" -#ifndef DBL_INF -#define DBL_INF ((long long)0x7FF0000000000000ull) -#endif - -#ifndef DBL_NAN -#define DBL_NAN ((long long)0x7FF8000000000000ull) -#endif - int main() { TEST_SET_START("20060907000000AAN","AAN", "frexpd2"); @@ -57,7 +49,7 @@ int main() //long long e0 = 0; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); double r1 = x1; //long long e1 = 0; @@ -172,7 +164,7 @@ int main() long long e23 = 1024; // +Inf - double x24 = hide_double( 1.0/0.0 ); + double x24 = hide_double(HUGE_VAL); double r24 = x24; //long long e24 = 0; @@ -199,14 +191,14 @@ int main() // Compound vec_llong2 keep29_v = exp_v; - vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; - vec_double2 r29_v = (vec_double2) { 1.0/0.0, nan("") }; + vec_double2 x29_v = (vec_double2) { HUGE_VAL, -nan("") }; + vec_double2 r29_v = (vec_double2) { HUGE_VAL, nan("") }; vec_llong2 e29_v = (vec_llong2) { spu_extract(exp_v, 0), spu_extract(exp_v, 1) }; // Compound vec_llong2 keep30_v = exp_v; - vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; - vec_double2 r30_v = (vec_double2) { hide_double(make_double(0xBFE4FF632B6A83E4ull)), -1.0/0.0 }; + vec_double2 x30_v = (vec_double2) { -1.2e-99, -HUGE_VAL } ; + vec_double2 r30_v = (vec_double2) { hide_double(make_double(0xBFE4FF632B6A83E4ull)), -HUGE_VAL }; vec_llong2 e30_v = (vec_llong2) { -328, spu_extract(exp_v, 1) }; vec_llong2 keep0_v = exp_v; diff --git a/Extras/simdmathlibrary/spu/tests/frexpf4.c b/Extras/simdmathlibrary/spu/tests/frexpf4.c index 8654ba4f5..645b5cec3 100644 --- a/Extras/simdmathlibrary/spu/tests/frexpf4.c +++ b/Extras/simdmathlibrary/spu/tests/frexpf4.c @@ -47,7 +47,7 @@ int main() int e0 = 129; // -Norm (IEEE-754: -Inf) - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VALF); float r1 = hide_float(make_float(0xBF7FFFFF)); int e1 = 129; @@ -162,7 +162,7 @@ int main() int e23 = 129; //+Norm (IEEE-754: +Inf) - float x24 = hide_float( 1.0/0.0); + float x24 = hide_float(HUGE_VALF); float r24 = hide_float(make_float(0x3F7FFFFF)); int e24 = 129; @@ -187,12 +187,12 @@ int main() vec_int4 e28_v = (vec_int4) { 129, 20, 0, 22 }; // Compound - vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_float4 x29_v = (vec_float4) { HUGE_VALF, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; vec_float4 r29_v = (vec_float4) { make_float(0x3F7FFFFF), 0.0, make_float(0xBF5B67B2), make_float(0xBF400000) }; vec_int4 e29_v = (vec_int4) { 129, 0, -87, 129 }; // Compound - vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -HUGE_VALF } ; vec_float4 r30_v = (vec_float4) { 0.0, make_float(0xBF0DABC6 ), make_float(0x3F60BD3C), make_float(0xBF7FFFFF) }; vec_int4 e30_v = (vec_int4) { 0, -62, -58, 129 }; diff --git a/Extras/simdmathlibrary/spu/tests/hypotd2.c b/Extras/simdmathlibrary/spu/tests/hypotd2.c index de1ab17d7..d66d1a495 100644 --- a/Extras/simdmathlibrary/spu/tests/hypotd2.c +++ b/Extras/simdmathlibrary/spu/tests/hypotd2.c @@ -54,7 +54,7 @@ int main() double r1 = hide_double( nan("")); //-Inf, -QNaN - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(make_double(0xFFFFFFFFFFFFFFFFull)); double r2 = hide_double( nan("")); @@ -70,13 +70,13 @@ int main() //-Norm, -Inf double x5 = hide_double(-168.97345223013); - double y5 = hide_double(-1.0/0.0); - double r5 = hide_double( 1.0/0.0); + double y5 = hide_double(-HUGE_VAL); + double r5 = hide_double(HUGE_VAL); //+Inf, -Inf - double x6 = hide_double( 1.0/0.0); - double y6 = hide_double(-1.0/0.0); - double r6 = hide_double( 1.0/0.0); + double x6 = hide_double(HUGE_VAL); + double y6 = hide_double(-HUGE_VAL); + double r6 = hide_double(HUGE_VAL); //-Norm, -0 double x7 = hide_double(-168.97345223013); @@ -159,9 +159,9 @@ int main() double r22 = hide_double(468729.8610289); //+Inf, +Ovf - double x23 = hide_double( 1.0/0.0); - double y23 = hide_double( 1.0e999); - double r23 = hide_double( 1.0/0.0); + double x23 = hide_double(HUGE_VAL); + double y23 = hide_double(1.0e999); + double r23 = hide_double(HUGE_VAL); //+Norm, +QNaN double x24 = hide_double(264.345643345); @@ -169,7 +169,7 @@ int main() double r24 = hide_double( nan("")); //+Inf, +QNaN - double x25 = hide_double( 1.0/0.0); + double x25 = hide_double(HUGE_VAL); double y25 = hide_double(nan("")); double r25 = hide_double(nan("")); diff --git a/Extras/simdmathlibrary/spu/tests/ilogbd2.c b/Extras/simdmathlibrary/spu/tests/ilogbd2.c index 1bc449d09..38cd9814b 100644 --- a/Extras/simdmathlibrary/spu/tests/ilogbd2.c +++ b/Extras/simdmathlibrary/spu/tests/ilogbd2.c @@ -30,19 +30,13 @@ #include #include +#include #include #include #include "simdmath.h" #include "common-test.h" #include "testutils.h" -#ifndef FP_ILOGB0 -#define FP_ILOGB0 ((int)0x80000001) -#endif -#ifndef FP_ILOGBNAN -#define FP_ILOGBNAN ((int)0x7FFFFFFF) -#endif - int main() { TEST_SET_START("20060904000000AAN","AAN", "ilogbd2"); @@ -52,7 +46,7 @@ int main() long long r0 = (long long)FP_ILOGBNAN; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); long long r1 = (long long)FP_ILOGB0; // -Dmax @@ -144,7 +138,7 @@ int main() long long r23 = 1023ll; // +Inf - double x24 = hide_double( 1.0/0.0); + double x24 = hide_double(HUGE_VAL); long long r24 = (long long)FP_ILOGB0; //+Nan @@ -164,11 +158,11 @@ int main() vec_llong2 r28_v = (vec_llong2) { FP_ILOGBNAN, 21ll }; // Compound - vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x29_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_llong2 r29_v = (vec_llong2) { FP_ILOGB0, FP_ILOGBNAN }; // Compound - vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; + vec_double2 x30_v = (vec_double2) { -1.2e-99, -HUGE_VAL } ; vec_llong2 r30_v = (vec_llong2) { -329ll, FP_ILOGB0 }; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/ilogbf4.c b/Extras/simdmathlibrary/spu/tests/ilogbf4.c index e76928ea5..cb804aaa3 100644 --- a/Extras/simdmathlibrary/spu/tests/ilogbf4.c +++ b/Extras/simdmathlibrary/spu/tests/ilogbf4.c @@ -31,16 +31,13 @@ #include #include +#include #include #include #include "simdmath.h" #include "common-test.h" #include "testutils.h" -#ifndef FP_ILOGB0 -#define FP_ILOGB0 ((int)0x80000001) -#endif - int main() { TEST_SET_START("20060904000000AAN","AAN", "ilogbf4"); @@ -50,7 +47,7 @@ int main() int r0 = 128; // -Norm (IEEE-754: -Inf) - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VALF); int r1 = 128; // -Smax @@ -142,7 +139,7 @@ int main() int r23 = 128; //+Norm (IEEE-754: +Inf) - float x24 = hide_float( 1.0/0.0); + float x24 = hide_float( HUGE_VALF); int r24 = 128; //+Norm (IEEE-754: +Nan) @@ -162,11 +159,11 @@ int main() vec_int4 r28_v = (vec_int4) { 128, 19, FP_ILOGB0, 21 }; // Compound - vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_float4 x29_v = (vec_float4) { HUGE_VALF, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; vec_int4 r29_v = (vec_int4) { 128, FP_ILOGB0, -88, 128 }; // Compound - vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -HUGE_VALF } ; vec_int4 r30_v = (vec_int4) { FP_ILOGB0, -63, -59, 128 }; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/is0denormd2.c b/Extras/simdmathlibrary/spu/tests/is0denormd2.c index 161bce6ec..9433749b8 100644 --- a/Extras/simdmathlibrary/spu/tests/is0denormd2.c +++ b/Extras/simdmathlibrary/spu/tests/is0denormd2.c @@ -45,7 +45,7 @@ int main() unsigned long long r0 = 0x0000000000000000ull; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; // -Dmax @@ -97,7 +97,7 @@ int main() unsigned long long r13 = 0x0000000000000000ull; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double( HUGE_VAL); unsigned long long r14 = 0x0000000000000000ull; //+Nan @@ -117,11 +117,11 @@ int main() vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; // Compound - vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { -1.0e-999, -HUGE_VAL} ; vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/is0denormf4.c b/Extras/simdmathlibrary/spu/tests/is0denormf4.c index ddf98b12d..dcb91d608 100644 --- a/Extras/simdmathlibrary/spu/tests/is0denormf4.c +++ b/Extras/simdmathlibrary/spu/tests/is0denormf4.c @@ -40,11 +40,11 @@ int main() TEST_SET_START("20060830000000AAN","AAN", "is0denormf4"); // -Nan - float x0 = hide_float(-nan("")); + float x0 = hide_float(-NANF); unsigned int r0 = 0x00000000; // -Inf - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VALF); unsigned int r1 = 0x00000000; // -Smax @@ -96,11 +96,11 @@ int main() unsigned int r13 = 0x00000000; // +Inf - float x14 = hide_float( 1.0/0.0); + float x14 = hide_float( HUGE_VALF); unsigned int r14 = 0x00000000; //+Nan - float x15 = hide_float( nan("")); + float x15 = hide_float(NANF); unsigned int r15 = 0x00000000; // Compound diff --git a/Extras/simdmathlibrary/spu/tests/isequald2.c b/Extras/simdmathlibrary/spu/tests/isequald2.c index 3980e6955..5e80be8c2 100644 --- a/Extras/simdmathlibrary/spu/tests/isequald2.c +++ b/Extras/simdmathlibrary/spu/tests/isequald2.c @@ -46,18 +46,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0x0000000000000000ull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0x0000000000000000ull; //-Norm < -Denorm @@ -131,7 +131,7 @@ int main() unsigned long long r17 = 0x0000000000000000ull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0x0000000000000000ull; diff --git a/Extras/simdmathlibrary/spu/tests/isequalf4.c b/Extras/simdmathlibrary/spu/tests/isequalf4.c index 702687813..52c5ad508 100644 --- a/Extras/simdmathlibrary/spu/tests/isequalf4.c +++ b/Extras/simdmathlibrary/spu/tests/isequalf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0xffffffff; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0x00000000; diff --git a/Extras/simdmathlibrary/spu/tests/isfinited2.c b/Extras/simdmathlibrary/spu/tests/isfinited2.c index 7d045535a..798195505 100644 --- a/Extras/simdmathlibrary/spu/tests/isfinited2.c +++ b/Extras/simdmathlibrary/spu/tests/isfinited2.c @@ -45,7 +45,7 @@ int main() unsigned long long r0 = 0x0000000000000000ull; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; // -Dmax @@ -97,7 +97,7 @@ int main() unsigned long long r13 = 0xffffffffffffffffull; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); unsigned long long r14 = 0x0000000000000000ull; //+Nan @@ -117,11 +117,11 @@ int main() vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; // Compound - vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { -1.0e-999, -HUGE_VAL} ; vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isfinitef4.c b/Extras/simdmathlibrary/spu/tests/isfinitef4.c index 4d2e39f8f..aff25a2f0 100644 --- a/Extras/simdmathlibrary/spu/tests/isfinitef4.c +++ b/Extras/simdmathlibrary/spu/tests/isfinitef4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -42,7 +43,7 @@ int main() float x0 = hide_float(-0.0f); unsigned int r0 = 0xffffffff; - float x1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0xffffffff; float x2 = hide_float(-0.0000000013152f); @@ -66,7 +67,7 @@ int main() float x8 = hide_float(2353705.31415f); unsigned int r8 = 0xffffffff; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax unsigned int r9 = 0xffffffff; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterd2.c b/Extras/simdmathlibrary/spu/tests/isgreaterd2.c index 420ba53c6..5c4ff593e 100644 --- a/Extras/simdmathlibrary/spu/tests/isgreaterd2.c +++ b/Extras/simdmathlibrary/spu/tests/isgreaterd2.c @@ -46,18 +46,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0xffffffffffffffffull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0x0000000000000000ull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0xffffffffffffffffull; //-Norm < -Denorm @@ -131,7 +131,7 @@ int main() unsigned long long r17 = 0x0000000000000000ull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0xffffffffffffffffull; diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c b/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c index f83e3db0c..00bf97ae0 100644 --- a/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c +++ b/Extras/simdmathlibrary/spu/tests/isgreaterequald2.c @@ -46,18 +46,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0xffffffffffffffffull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0x0000000000000000ull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0xffffffffffffffffull; //-Norm < -Denorm @@ -131,7 +131,7 @@ int main() unsigned long long r17 = 0x0000000000000000ull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0xffffffffffffffffull; diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c b/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c index 197822590..ac13a26e1 100644 --- a/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c +++ b/Extras/simdmathlibrary/spu/tests/isgreaterequalf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0xffffffff; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0xffffffff; float x2 = hide_float(-0.0000000013152f); @@ -56,14 +57,14 @@ int main() unsigned int r3 = 0xffffffff; float x4 = hide_float(-83532.96153153f); - float y4 = hide_float(-1e-999); //-Smin + float y4 = hide_float(-FLT_MIN); //-Smin unsigned int r4 = 0x00000000; float x5 = hide_float(-321.01234567f); float y5 = hide_float(876543.12345f); unsigned int r5 = 0x00000000; - float x6 = hide_float( 1e-999); // Smin + float x6 = hide_float(FLT_MIN); // Smin float y6 = hide_float(0.0031529324f); unsigned int r6 = 0x00000000; @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0xffffffff; diff --git a/Extras/simdmathlibrary/spu/tests/isgreaterf4.c b/Extras/simdmathlibrary/spu/tests/isgreaterf4.c index 109ae143f..8390f8ce6 100644 --- a/Extras/simdmathlibrary/spu/tests/isgreaterf4.c +++ b/Extras/simdmathlibrary/spu/tests/isgreaterf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0xffffffff; float x2 = hide_float(-0.0000000013152f); @@ -56,14 +57,14 @@ int main() unsigned int r3 = 0x00000000; float x4 = hide_float(-83532.96153153f); - float y4 = hide_float(-1e-999); //-Smin + float y4 = hide_float(-FLT_MIN); //-Smin unsigned int r4 = 0x00000000; float x5 = hide_float(-321.01234567f); float y5 = hide_float(876543.12345f); unsigned int r5 = 0x00000000; - float x6 = hide_float( 1e-999); // Smin + float x6 = hide_float(FLT_MIN); // Smin float y6 = hide_float(0.0031529324f); unsigned int r6 = 0x00000000; @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0xffffffff; diff --git a/Extras/simdmathlibrary/spu/tests/isinfd2.c b/Extras/simdmathlibrary/spu/tests/isinfd2.c index c31c3baf5..94b49a3b6 100644 --- a/Extras/simdmathlibrary/spu/tests/isinfd2.c +++ b/Extras/simdmathlibrary/spu/tests/isinfd2.c @@ -45,7 +45,7 @@ int main() unsigned long long r0 = 0x0000000000000000ull; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0xffffffffffffffffull; // -Dmax @@ -97,7 +97,7 @@ int main() unsigned long long r13 = 0x0000000000000000ull; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); unsigned long long r14 = 0xffffffffffffffffull; //+Nan @@ -117,11 +117,11 @@ int main() vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_ullong2 r19_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; // Compound - vec_double2 x20_v = (vec_double2) { -1.0e999, -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { -1.0e999, -HUGE_VAL} ; vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0xffffffffffffffffull}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isinff4.c b/Extras/simdmathlibrary/spu/tests/isinff4.c index 2e3888d89..f2bf174c2 100644 --- a/Extras/simdmathlibrary/spu/tests/isinff4.c +++ b/Extras/simdmathlibrary/spu/tests/isinff4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -42,7 +43,7 @@ int main() float x0 = hide_float(-0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -66,7 +67,7 @@ int main() float x8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax unsigned int r9 = 0x00000000; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/islessd2.c b/Extras/simdmathlibrary/spu/tests/islessd2.c index 51e800816..087df293a 100644 --- a/Extras/simdmathlibrary/spu/tests/islessd2.c +++ b/Extras/simdmathlibrary/spu/tests/islessd2.c @@ -47,18 +47,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0xffffffffffffffffull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0x0000000000000000ull; //-Norm < -Denorm @@ -132,7 +132,7 @@ int main() unsigned long long r17 = 0xffffffffffffffffull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0x0000000000000000ull; diff --git a/Extras/simdmathlibrary/spu/tests/islessequald2.c b/Extras/simdmathlibrary/spu/tests/islessequald2.c index d9d958b76..52c9ea4cb 100644 --- a/Extras/simdmathlibrary/spu/tests/islessequald2.c +++ b/Extras/simdmathlibrary/spu/tests/islessequald2.c @@ -46,18 +46,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0xffffffffffffffffull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0x0000000000000000ull; //-Norm < -Denorm @@ -131,7 +131,7 @@ int main() unsigned long long r17 = 0xffffffffffffffffull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0x0000000000000000ull; diff --git a/Extras/simdmathlibrary/spu/tests/islessequalf4.c b/Extras/simdmathlibrary/spu/tests/islessequalf4.c index 0af521028..0960e1c15 100644 --- a/Extras/simdmathlibrary/spu/tests/islessequalf4.c +++ b/Extras/simdmathlibrary/spu/tests/islessequalf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0xffffffff; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -56,14 +57,14 @@ int main() unsigned int r3 = 0xffffffff; float x4 = hide_float(-83532.96153153f); - float y4 = hide_float(-1e-999); //-Smin + float y4 = hide_float(-FLT_MIN); //-Smin unsigned int r4 = 0xffffffff; float x5 = hide_float(-321.01234567f); float y5 = hide_float(876543.12345f); unsigned int r5 = 0xffffffff; - float x6 = hide_float( 1e-999); // Smin + float x6 = hide_float(FLT_MIN); // Smin float y6 = hide_float(0.0031529324f); unsigned int r6 = 0xffffffff; @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0xffffffff; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0x00000000; diff --git a/Extras/simdmathlibrary/spu/tests/islessf4.c b/Extras/simdmathlibrary/spu/tests/islessf4.c index 26fcaa22a..593cea616 100644 --- a/Extras/simdmathlibrary/spu/tests/islessf4.c +++ b/Extras/simdmathlibrary/spu/tests/islessf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -56,14 +57,14 @@ int main() unsigned int r3 = 0x00000000; float x4 = hide_float(-83532.96153153f); - float y4 = hide_float(-1e-999); //-Smin + float y4 = hide_float(-FLT_MIN); //-Smin unsigned int r4 = 0xffffffff; float x5 = hide_float(-321.01234567f); float y5 = hide_float(876543.12345f); unsigned int r5 = 0xffffffff; - float x6 = hide_float( 1e-999); // Smin + float x6 = hide_float(FLT_MIN); // Smin float y6 = hide_float(0.0031529324f); unsigned int r6 = 0xffffffff; @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0xffffffff; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0x00000000; diff --git a/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c b/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c index 5d93d2d0b..e16ec86b0 100644 --- a/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c +++ b/Extras/simdmathlibrary/spu/tests/islessgreaterd2.c @@ -46,18 +46,18 @@ int main() unsigned long long r0 = 0x0000000000000000ull; //+Inf > -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0xffffffffffffffffull; //-Inf < -Dmax - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(-DBL_MAX); unsigned long long r2 = 0xffffffffffffffffull; //-Norm > -Inf double x3 = hide_double(-67418234.34256245); - double y3 = hide_double(-1.0/0.0); + double y3 = hide_double(-HUGE_VAL); unsigned long long r3 = 0xffffffffffffffffull; //-Norm < -Denorm @@ -131,7 +131,7 @@ int main() unsigned long long r17 = 0xffffffffffffffffull; //+Inf > +Dmax - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double(HUGE_VAL); double y18 = hide_double(DBL_MAX); unsigned long long r18 = 0xffffffffffffffffull; diff --git a/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c b/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c index 51cf06672..ae8e1231e 100644 --- a/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c +++ b/Extras/simdmathlibrary/spu/tests/islessgreaterf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float( FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0xffffffff; float x2 = hide_float(-0.0000000013152f); @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0xffffffff; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0xffffffff; diff --git a/Extras/simdmathlibrary/spu/tests/isnand2.c b/Extras/simdmathlibrary/spu/tests/isnand2.c index c01b8eef3..9d42a7da7 100644 --- a/Extras/simdmathlibrary/spu/tests/isnand2.c +++ b/Extras/simdmathlibrary/spu/tests/isnand2.c @@ -46,7 +46,7 @@ int main() unsigned long long r0 = 0xffffffffffffffffull; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; // -Dmax @@ -98,7 +98,7 @@ int main() unsigned long long r13 = 0xffffffffffffffffull; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); unsigned long long r14 = 0x0000000000000000ull; //+Nan @@ -118,11 +118,11 @@ int main() vec_ullong2 r18_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; // Compound - vec_double2 x20_v = (vec_double2) { make_double(0x7FF8000000000000ull), -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { make_double(0x7FF8000000000000ull), -HUGE_VAL} ; vec_ullong2 r20_v = (vec_ullong2) {0xffffffffffffffffull, 0x0000000000000000ull}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isnanf4.c b/Extras/simdmathlibrary/spu/tests/isnanf4.c index 0bc6bb30e..c0c5572a5 100644 --- a/Extras/simdmathlibrary/spu/tests/isnanf4.c +++ b/Extras/simdmathlibrary/spu/tests/isnanf4.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -41,7 +42,7 @@ int main() float x0 = hide_float(-0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -65,7 +66,7 @@ int main() float x8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax unsigned int r9 = 0x00000000; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isnormald2.c b/Extras/simdmathlibrary/spu/tests/isnormald2.c index d2b2d290e..7200dd5c9 100644 --- a/Extras/simdmathlibrary/spu/tests/isnormald2.c +++ b/Extras/simdmathlibrary/spu/tests/isnormald2.c @@ -45,7 +45,7 @@ int main() unsigned long long r0 = 0x0000000000000000ull; // -Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; // -Dmax @@ -97,7 +97,7 @@ int main() unsigned long long r13 = 0xffffffffffffffffull; // +Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); unsigned long long r14 = 0x0000000000000000ull; //+Nan @@ -117,11 +117,11 @@ int main() vec_ullong2 r18_v = (vec_ullong2) {0x0000000000000000ull, 0xffffffffffffffffull}; // Compound - vec_double2 x19_v = (vec_double2) { 1.0/0.0, -nan("") }; + vec_double2 x19_v = (vec_double2) { HUGE_VAL, -nan("") }; vec_ullong2 r19_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; // Compound - vec_double2 x20_v = (vec_double2) { -1.0e-999, -1.0/0.0} ; + vec_double2 x20_v = (vec_double2) { -1.0e-999, -HUGE_VAL}; vec_ullong2 r20_v = (vec_ullong2) {0x0000000000000000ull, 0x0000000000000000ull}; vec_double2 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isnormalf4.c b/Extras/simdmathlibrary/spu/tests/isnormalf4.c index 529c53c0a..30cde0933 100644 --- a/Extras/simdmathlibrary/spu/tests/isnormalf4.c +++ b/Extras/simdmathlibrary/spu/tests/isnormalf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -42,7 +43,7 @@ int main() float x0 = hide_float(-0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0xffffffff; float x2 = hide_float( 0.0f); @@ -57,7 +58,7 @@ int main() float x5 = hide_float(876543.12345f); unsigned int r5 = 0xffffffff; - float x6 = hide_float( 1e-999); // Smin + float x6 = hide_float(1e-999); // Smin unsigned int r6 = 0x00000000; float x7 = hide_float(5172.2845321f); @@ -66,7 +67,7 @@ int main() float x8 = hide_float(2353705.31415f); unsigned int r8 = 0xffffffff; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax unsigned int r9 = 0xffffffff; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/isunorderedd2.c b/Extras/simdmathlibrary/spu/tests/isunorderedd2.c index 935947e9d..4f562b35c 100644 --- a/Extras/simdmathlibrary/spu/tests/isunorderedd2.c +++ b/Extras/simdmathlibrary/spu/tests/isunorderedd2.c @@ -46,12 +46,12 @@ int main() unsigned long long r0 = 0xffffffffffffffffull; //+Inf, -Inf - double x1 = hide_double( 1.0/0.0); - double y1 = hide_double(-1.0/0.0); + double x1 = hide_double( HUGE_VAL); + double y1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0x0000000000000000ull; //-Inf, -QNaN - double x2 = hide_double(-1.0/0.0); + double x2 = hide_double(-HUGE_VAL); double y2 = hide_double(make_double(0xFFFFFFFFFFFFFFFFull)); unsigned long long r2 = 0xffffffffffffffffull; @@ -67,7 +67,7 @@ int main() //-Norm, -Inf double x5 = hide_double(-168.97345223013); - double y5 = hide_double(-1.0/0.0); + double y5 = hide_double(-HUGE_VAL); unsigned long long r5 = 0x0000000000000000ull; //-QNaN, -Norm @@ -131,12 +131,12 @@ int main() unsigned long long r17 = 0x0000000000000000ull; //+Inf, +Ovf - double x18 = hide_double( 1.0/0.0); + double x18 = hide_double( HUGE_VAL); double y18 = hide_double( 1.0e999); unsigned long long r18 = 0x0000000000000000ull; //+Inf, +QNaN - double x19 = hide_double( 1.0/0.0); + double x19 = hide_double( HUGE_VAL); double y19 = hide_double(nan("")); unsigned long long r19 = 0xffffffffffffffffull; diff --git a/Extras/simdmathlibrary/spu/tests/isunorderedf4.c b/Extras/simdmathlibrary/spu/tests/isunorderedf4.c index 4ba85a6a5..d26a6c8d9 100644 --- a/Extras/simdmathlibrary/spu/tests/isunorderedf4.c +++ b/Extras/simdmathlibrary/spu/tests/isunorderedf4.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "simdmath.h" #include "common-test.h" #include "testutils.h" @@ -43,8 +44,8 @@ int main() float y0 = hide_float( 0.0f); unsigned int r0 = 0x00000000; - float x1 = hide_float( 1.0/0.0); //+Smax - float y1 = hide_float(-1.0/0.0); //-Smax + float x1 = hide_float( FLT_MAX); //+Smax + float y1 = hide_float(-FLT_MAX); //-Smax unsigned int r1 = 0x00000000; float x2 = hide_float(-0.0000000013152f); @@ -75,7 +76,7 @@ int main() float y8 = hide_float(2353705.31415f); unsigned int r8 = 0x00000000; - float x9 = hide_float( 1.0/0.0); // Smax + float x9 = hide_float(FLT_MAX); // Smax float y9 = hide_float(9.43574552184f); unsigned int r9 = 0x00000000; diff --git a/Extras/simdmathlibrary/spu/tests/llrintd2.c b/Extras/simdmathlibrary/spu/tests/llrintd2.c index 277688d5f..449bb4cb3 100644 --- a/Extras/simdmathlibrary/spu/tests/llrintd2.c +++ b/Extras/simdmathlibrary/spu/tests/llrintd2.c @@ -66,11 +66,11 @@ int main() // unsigned long long i11 = 0x7FFFFFFFFFFFFDFFull; //limit // unsigned long long i12 = 0xFFFFFFFFFFFFFDFFull; //limit -// double x0 = hide_double(-1.0/0.0); // -Inf -// double x1 = hide_double(1.0/0.0); // Inf +// double x0 = hide_double(-HUGE_VAL); // -Inf +// double x1 = hide_double(HUGE_VAL); // Inf double x2 = hide_double(0.0); // +0 double x3 = hide_double(-0.0); // -0 -// double x4 = hide_double(0.0/0.0); // NaN -> NaN +// double x4 = hide_double(nan("")); // NaN -> NaN double x5 = hide_double( 0.5); double x6 = hide_double(-0.5); double x7 = hide_double( 0.4999999999999999); // 0 diff --git a/Extras/simdmathlibrary/spu/tests/llrintf4.c b/Extras/simdmathlibrary/spu/tests/llrintf4.c index e165a6524..75b6777b6 100644 --- a/Extras/simdmathlibrary/spu/tests/llrintf4.c +++ b/Extras/simdmathlibrary/spu/tests/llrintf4.c @@ -47,7 +47,6 @@ #include #include #include -//#include #include #include "simdmath.h" #include "common-test.h" @@ -71,11 +70,11 @@ int main() // unsigned long i11 = 0x49fffffful; //2097151.875000 // unsigned long i12 = 0x4a7ffffful; //4194303.750000 -// float x0 = hide_float(-1.0/0.0); // -Inf -// float x1 = hide_float(1.0/0.0); // Inf +// float x0 = hide_float(-FLT_MAX); // -Inf +// float x1 = hide_float(FLT_MAX); // Inf float x2 = hide_float(0.0); // +0 float x3 = hide_float(-0.0); // -0 -// float x4 = hide_float(0.0/0.0); // NaN -> NaN +// float x4 = hide_float(NANF); // NaN -> NaN float x5 = hide_float( 0.5); float x6 = hide_float(-0.5); float x7 = hide_float(-0.499999); diff --git a/Extras/simdmathlibrary/spu/tests/llroundd2.c b/Extras/simdmathlibrary/spu/tests/llroundd2.c index e982dc199..6111180bd 100644 --- a/Extras/simdmathlibrary/spu/tests/llroundd2.c +++ b/Extras/simdmathlibrary/spu/tests/llroundd2.c @@ -46,7 +46,6 @@ #include #include #include -//#include #include #include "simdmath.h" #include "common-test.h" @@ -60,11 +59,11 @@ int main() // unsigned long long i11 = 0x7FFFFFFFFFFFFDFFull; //limit // unsigned long long i12 = 0xFFFFFFFFFFFFFDFFull; //limit -// double x0 = hide_double(-1.0/0.0); // -Inf -// double x1 = hide_double(1.0/0.0); // Inf +// double x0 = hide_double(-HUGE_VAL); // -Inf +// double x1 = hide_double(HUGE_VAL); // Inf double x2 = hide_double(0.0); // +0 double x3 = hide_double(-0.0); // -0 -// double x4 = hide_double(0.0/0.0); // NaN -> NaN +// double x4 = hide_double(nan("")); // NaN -> NaN double x5 = hide_double( 0.5); double x6 = hide_double(-0.5); double x7 = hide_double( 0.4999999999999999); // 0 diff --git a/Extras/simdmathlibrary/spu/tests/llroundf4.c b/Extras/simdmathlibrary/spu/tests/llroundf4.c index 7b1cf6643..8025a2cc4 100644 --- a/Extras/simdmathlibrary/spu/tests/llroundf4.c +++ b/Extras/simdmathlibrary/spu/tests/llroundf4.c @@ -49,7 +49,6 @@ #include #include #include -//#include #include #include "simdmath.h" #include "common-test.h" @@ -73,11 +72,11 @@ int main() // unsigned long i11 = 0x49fffffful; //2097151.875000 // unsigned long i12 = 0x4a7ffffful; //4194303.750000 -// float x0 = hide_float(-1.0/0.0); // -Inf -// float x1 = hide_float(1.0/0.0); // Inf +// float x0 = hide_float(-FLT_MAX); // -Inf +// float x1 = hide_float(FLT_MAX); // Inf float x2 = hide_float(0.0); // +0 float x3 = hide_float(-0.0); // -0 -// float x4 = hide_float(0.0/0.0); // NaN -> NaN +// float x4 = hide_float(NANF); // NaN -> NaN float x5 = hide_float( 0.5); float x6 = hide_float(-0.5); float x7 = hide_float(-0.499999); diff --git a/Extras/simdmathlibrary/spu/tests/logbd2.c b/Extras/simdmathlibrary/spu/tests/logbd2.c index fcb33f704..60dbd1cf7 100644 --- a/Extras/simdmathlibrary/spu/tests/logbd2.c +++ b/Extras/simdmathlibrary/spu/tests/logbd2.c @@ -35,18 +35,6 @@ #include "common-test.h" #include "testutils.h" -#ifndef HUGE_VALL -#define HUGE_VALL __builtin_huge_vall () -#endif - -#ifndef DBL_INF -#define DBL_INF ((long long)0x7FF0000000000000ull) -#endif - -#ifndef DBL_NAN -#define DBL_NAN ((long long)0x7FF8000000000000ull) -#endif - int main() { TEST_SET_START("20060905000000AAN","AAN", "logbd2"); @@ -56,8 +44,8 @@ int main() double r0 = hide_double( nan("")); // -Inf - double x1 = hide_double(-1.0/0.0); - double r1 = hide_double(make_double(DBL_INF)); + double x1 = hide_double(-HUGE_VAL); + double r1 = hide_double(HUGE_VAL); // -Dmax double x2 = hide_double(-DBL_MAX); @@ -77,19 +65,19 @@ int main() // -Unf double x6 = hide_double(-1.0e-999); - double r6 = make_double(-((unsigned long long)HUGE_VALL)); + double r6 = hide_double(-HUGE_VAL); // -0 double x7 = hide_double(-0.0); - double r7 = make_double(-((unsigned long long)HUGE_VALL)); + double r7 = hide_double(-HUGE_VAL); // 0 double x8 = hide_double( 0.0); - double r8 = make_double(-((unsigned long long)HUGE_VALL)); + double r8 = hide_double(-HUGE_VAL); // +Unf double x9 = hide_double( 1.0e-999); - double r9 = make_double(-((unsigned long long)HUGE_VALL)); + double r9 = hide_double(-HUGE_VAL); // +Denorm double x10 = hide_double( 2.40e-310); @@ -148,8 +136,8 @@ int main() double r23 = 1023.0; // +Inf - double x24 = hide_double( 1.0/0.0); - double r24 = hide_double(make_double(DBL_INF)); + double x24 = hide_double(HUGE_VAL); + double r24 = hide_double(HUGE_VAL); //+Nan double x25 = hide_double( nan("")); @@ -157,7 +145,7 @@ int main() // Compound vec_double2 x26_v = (vec_double2) { -2.561286432e-317, -1.0e-999 }; - vec_double2 r26_v = (vec_double2) { -1052.0, make_double(-((unsigned long long)HUGE_VALL)) }; + vec_double2 r26_v = (vec_double2) { -1052.0, hide_double(-HUGE_VAL) }; // Compound vec_double2 x27_v = (vec_double2) { 345.27533, -8.673e-310 }; @@ -168,12 +156,12 @@ int main() vec_double2 r28_v = (vec_double2) { nan(""), 21.0 }; // Compound - vec_double2 x29_v = (vec_double2) { 1.0/0.0, -nan("") }; - vec_double2 r29_v = (vec_double2) { make_double(DBL_INF), nan("") }; + vec_double2 x29_v = (vec_double2) { HUGE_VAL, -nan("") }; + vec_double2 r29_v = (vec_double2) { HUGE_VAL, nan("") }; // Compound - vec_double2 x30_v = (vec_double2) { -1.2e-99, -1.0/0.0 } ; - vec_double2 r30_v = (vec_double2) { -329.0, make_double(DBL_INF) }; + vec_double2 x30_v = (vec_double2) { -1.2e-99, -HUGE_VAL } ; + vec_double2 r30_v = (vec_double2) { -329.0, HUGE_VAL }; vec_double2 x0_v = spu_splats(x0); vec_double2 r0_v = spu_splats(r0); diff --git a/Extras/simdmathlibrary/spu/tests/logbf4.c b/Extras/simdmathlibrary/spu/tests/logbf4.c index 43b0ae873..476a940a5 100644 --- a/Extras/simdmathlibrary/spu/tests/logbf4.c +++ b/Extras/simdmathlibrary/spu/tests/logbf4.c @@ -37,10 +37,6 @@ #include "common-test.h" #include "testutils.h" -#ifndef HUGE_VALF -#define HUGE_VALF __builtin_huge_valf () -#endif - int main() { TEST_SET_START("20060905000000AAN","AAN", "logbf4"); @@ -50,7 +46,7 @@ int main() float r0 = 128.0f; // -Norm (IEEE-754: -Inf) - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VALF); float r1 = 128.0f; // -Smax @@ -67,27 +63,27 @@ int main() // -Denorm float x5 = hide_float(make_float(0x807AAAAA)); - float r5 = (float)-HUGE_VALF; + float r5 = -HUGE_VALF; // -Unf float x6 = hide_float(-1.0e-999); - float r6 = (float)-HUGE_VALF; + float r6 = -HUGE_VALF; // -0 float x7 = hide_float(-0.0); - float r7 = (float)-HUGE_VALF; + float r7 = -HUGE_VALF; // 0 float x8 = hide_float( 0.0); - float r8 = (float)-HUGE_VALF; + float r8 = -HUGE_VALF; // +Unf float x9 = hide_float( 1.0e-999); - float r9 = (float)-HUGE_VALF; + float r9 = -HUGE_VALF; // +Denorm float x10 = hide_float(make_float(0x007AAAAA)); - float r10 = (float)-HUGE_VALF; + float r10 = -HUGE_VALF; // +Smin float x11 = hide_float(make_float(0x00800000)); @@ -142,7 +138,7 @@ int main() float r23 = 128.0f; //+Norm (IEEE-754: +Inf) - float x24 = hide_float( 1.0/0.0); + float x24 = hide_float(HUGE_VALF); float r24 = 128.0f; //+Norm (IEEE-754: +Nan) @@ -162,11 +158,11 @@ int main() vec_float4 r28_v = (vec_float4) { 128.0f, 19.0f, -HUGE_VALF, 21.0f }; // Compound - vec_float4 x29_v = (vec_float4) { 1.0/0.0, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; + vec_float4 x29_v = (vec_float4) { HUGE_VALF, 1.0e-99, -5.53856231e-27, make_float(0xFFC00000) }; vec_float4 r29_v = (vec_float4) { 128.0f, -HUGE_VALF, -88.0f, 128.0f }; // Compound - vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -1.0/0.0 } ; + vec_float4 x30_v = (vec_float4) { 1.2e-57, -1.2e-19, 3.045784e-18, -HUGE_VALF } ; vec_float4 r30_v = (vec_float4) { -HUGE_VALF, -63.0f, -59.0f, 128.0f }; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/negated2.c b/Extras/simdmathlibrary/spu/tests/negated2.c index 777db15af..071a90b02 100644 --- a/Extras/simdmathlibrary/spu/tests/negated2.c +++ b/Extras/simdmathlibrary/spu/tests/negated2.c @@ -45,8 +45,8 @@ int main() double x1p = hide_double(83532.96153153); double x2n = hide_double(-0.0000000013152); double x2p = hide_double(0.0000000013152); - double x3n = hide_double(-1.0/0.0); - double x3p = hide_double(1.0/0.0); + double x3n = hide_double(-HUGE_VAL); + double x3p = hide_double(HUGE_VAL); vec_double2 x0n_v = spu_splats(x0n); vec_double2 x0p_v = spu_splats(x0p); diff --git a/Extras/simdmathlibrary/spu/tests/recipd2.c b/Extras/simdmathlibrary/spu/tests/recipd2.c index 6d14ed53d..573a69194 100644 --- a/Extras/simdmathlibrary/spu/tests/recipd2.c +++ b/Extras/simdmathlibrary/spu/tests/recipd2.c @@ -53,11 +53,11 @@ int main() unsigned long long i12 = 0x1ac4d062d451c99dull; unsigned long long i12r = 0x6518994c26ebbb3eull; - double x0 = hide_double(-1.0/0.0); // -Inf - double x1 = hide_double(1.0/0.0); // Inf + double x0 = hide_double(-HUGE_VAL); // -Inf + double x1 = hide_double(HUGE_VAL); // Inf double x2 = hide_double(0.0); // 0 double x3 = hide_double(-0.0); // -0 - double x4 = hide_double(0.0/0.0); // NaN + double x4 = hide_double(nan("")); // NaN double x5 = hide_double(2.0); double x5r = hide_double(0.5); double x6 = hide_double(make_double(i6)); diff --git a/Extras/simdmathlibrary/spu/tests/remquof4.c b/Extras/simdmathlibrary/spu/tests/remquof4.c index 065db3573..c350efd1a 100644 --- a/Extras/simdmathlibrary/spu/tests/remquof4.c +++ b/Extras/simdmathlibrary/spu/tests/remquof4.c @@ -116,8 +116,6 @@ int main() vec_float4 x5d_v = spu_splats(x5d); vec_float4 x5r_v = spu_splats(x5r); - float res; - int quo; vec_float4 res_v; vec_int4 quo_v; @@ -140,26 +138,6 @@ int main() res_v = remquof4(x5n_v, x5d_v, &quo_v); TEST_CHECK("20060912170038NM", allequal_ulps_float4( res_v, x5r_v, 1 ), 0); TEST_CHECK("20060912170138NM", allequal_int4( quo_v, spu_splats((int)i5q) ), 0); - - TEST_START("remquof"); - res = remquof(x0n, x0d, &quo); - TEST_CHECK("20060912170041NM", ulpDiff_f( res, x0r ) <= 1, 0); - TEST_CHECK("20060912170141NM", (quo == (int)i0q), 0); - res = remquof(x1n, x1d, &quo); - TEST_CHECK("20060912170042NM", ulpDiff_f( res, x1r ) <= 1, 0); - TEST_CHECK("20060912170142NM", (quo == (int)i1q), 0); - res = remquof(x2n, x2d, &quo); - TEST_CHECK("20060912170043NM", ulpDiff_f( res, x2r ) <= 1, 0); - TEST_CHECK("20060912170143NM", (quo == (int)i2q), 0); - res = remquof(x3n, x3d, &quo); - TEST_CHECK("20060912170048NM", ulpDiff_f( res, x3r ) <= 1, 0); - TEST_CHECK("20060912170144NM", (quo == (int)i3q), 0); - res = remquof(x4n, x4d, &quo); - TEST_CHECK("20060912170049NM", ulpDiff_f( res, x4r ) <= 1, 0); - TEST_CHECK("20060912170149NM", (quo == (int)i4q), 0); - res = remquof(x5n, x5d, &quo); - TEST_CHECK("20060912170050NM", ulpDiff_f( res, x5r ) <= 1, 0); - TEST_CHECK("20060912170150NM", (quo == (int)i5q), 0); TEST_SET_DONE(); diff --git a/Extras/simdmathlibrary/spu/tests/rsqrtd2.c b/Extras/simdmathlibrary/spu/tests/rsqrtd2.c index 63de28d65..52247a304 100644 --- a/Extras/simdmathlibrary/spu/tests/rsqrtd2.c +++ b/Extras/simdmathlibrary/spu/tests/rsqrtd2.c @@ -52,11 +52,11 @@ int main() unsigned long long i11 = 0x1aabc083c5c26227ull; unsigned long long i11r = 0x52912e543817fabbull; - double x0 = hide_double(-1.0/0.0); // -Inf -> NaN - double x1 = hide_double(1.0/0.0); // Inf -> +0 + double x0 = hide_double(-HUGE_VAL); // -Inf -> NaN + double x1 = hide_double(HUGE_VAL); // Inf -> +0 double x2 = hide_double(0.0); // +0 -> Inf double x3 = hide_double(-0.0); // -0 -> -Inf - double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x4 = hide_double(nan("")); // NaN -> NaN double x5 = hide_double(4.0); double x5r = hide_double(0.5); double x6 = hide_double(make_double(i6)); diff --git a/Extras/simdmathlibrary/spu/tests/signbitd2.c b/Extras/simdmathlibrary/spu/tests/signbitd2.c index e4c94cb9e..b81d0b98b 100644 --- a/Extras/simdmathlibrary/spu/tests/signbitd2.c +++ b/Extras/simdmathlibrary/spu/tests/signbitd2.c @@ -45,7 +45,7 @@ int main() unsigned long long r0 = 0xffffffffffffffffull; //-Inf - double x1 = hide_double(-1.0/0.0); + double x1 = hide_double(-HUGE_VAL); unsigned long long r1 = 0xffffffffffffffffull; //-Smax @@ -97,7 +97,7 @@ int main() unsigned long long r13 = 0x0000000000000000ull; //+Inf - double x14 = hide_double( 1.0/0.0); + double x14 = hide_double(HUGE_VAL); unsigned long long r14 = 0x0000000000000000ull; //+NaN diff --git a/Extras/simdmathlibrary/spu/tests/signbitf4.c b/Extras/simdmathlibrary/spu/tests/signbitf4.c index d30ff7340..156cc2d6e 100644 --- a/Extras/simdmathlibrary/spu/tests/signbitf4.c +++ b/Extras/simdmathlibrary/spu/tests/signbitf4.c @@ -41,11 +41,11 @@ int main() TEST_SET_START("20060829000000AAN","AAN", "signbitf4"); //-Nan - float x0 = hide_float(-nan("")); + float x0 = hide_float(-NANF); unsigned int r0 = 0xffffffff; //-Inf - float x1 = hide_float(-1.0/0.0); + float x1 = hide_float(-HUGE_VAL); unsigned int r1 = 0xffffffff; //-Smax @@ -97,11 +97,11 @@ int main() unsigned int r13 = 0x00000000; //+Inf - float x14 = hide_float( 1.0/0.0); + float x14 = hide_float(HUGE_VAL); unsigned int r14 = 0x00000000; //+NaN - float x15 = hide_float( nan("")); + float x15 = hide_float(NANF); unsigned int r15 = 0x00000000; vec_float4 x0_v = spu_splats(x0); diff --git a/Extras/simdmathlibrary/spu/tests/sqrtd2.c b/Extras/simdmathlibrary/spu/tests/sqrtd2.c index d83753052..0863949fa 100644 --- a/Extras/simdmathlibrary/spu/tests/sqrtd2.c +++ b/Extras/simdmathlibrary/spu/tests/sqrtd2.c @@ -54,11 +54,11 @@ int main() unsigned long long i11 = 0x1aabc083c5c26227ull; unsigned long long i11r = 0x2d4dcce790f64a35ull; - double x0 = hide_double(-1.0/0.0); // -Inf -> NaN - double x1 = hide_double(1.0/0.0); // Inf -> Inf + double x0 = hide_double(-HUGE_VAL); // -Inf -> NaN + double x1 = hide_double(HUGE_VAL); // Inf -> Inf double x2 = hide_double(0.0); // +0 -> +0 double x3 = hide_double(-0.0); // -0 -> -0 - double x4 = hide_double(0.0/0.0); // NaN -> NaN + double x4 = hide_double(nan("")); // NaN -> NaN double x5 = hide_double(4.0); double x5r = hide_double(2.0); double x6 = hide_double(make_double(i6)); diff --git a/Extras/simdmathlibrary/spu/tests/testutils.c b/Extras/simdmathlibrary/spu/tests/testutils.c index 350110f03..58af0dfc9 100644 --- a/Extras/simdmathlibrary/spu/tests/testutils.c +++ b/Extras/simdmathlibrary/spu/tests/testutils.c @@ -105,7 +105,7 @@ vec_uint4 bitDiff_f4(vec_float4 ref, vec_float4 vals) { vec_int4 diff = spu_sub(refi, valsi); vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - return spu_sub((vec_uint4)spu_splats(32), spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)))); + return spu_sub((vec_uint4)spu_splats(32), spu_cntlz(spu_sel(negdiff, diff, spu_cmpgt(diff, 0)))); } unsigned int bitDiff_f(float ref, float val) { @@ -156,7 +156,7 @@ vec_uint4 ulpDiff_f4(vec_float4 ref, vec_float4 vals) { vec_int4 diff = spu_sub(refi, valsi); vec_int4 negdiff = spu_sub(spu_splats((int)0), diff); - return (vec_uint4)(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0))); + return (vec_uint4)(spu_sel(negdiff, diff, spu_cmpgt(diff, 0))); } unsigned int ulpDiff_f(float ref, float val) { diff --git a/Extras/simdmathlibrary/spu/tests/testutils.h b/Extras/simdmathlibrary/spu/tests/testutils.h index f207bc460..b557b7601 100644 --- a/Extras/simdmathlibrary/spu/tests/testutils.h +++ b/Extras/simdmathlibrary/spu/tests/testutils.h @@ -32,7 +32,9 @@ #ifndef _TESTUTILS_H_ -#include "floatingpoint_tests.h" +#include + +#define NANF __builtin_nanf("") extern unsigned int hide_uint( unsigned int x ); extern int hide_int( int x );