added updated version of simdmathlibrary-1.0.1

This commit is contained in:
ejcoumans
2007-07-27 18:53:58 +00:00
parent fddd6c5721
commit f360dd27d6
377 changed files with 9928 additions and 6136 deletions

View File

@@ -30,11 +30,12 @@
# All that you do to add a file is edit OBJS, the rest will just work
prefix = /usr
prefix_spu = $(prefix)/spu
DESTDIR =
OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \
is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \
sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
sinf4.o isgreaterd2.o sind2.o sincosd2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \
negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \
cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \
@@ -51,7 +52,7 @@ OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o
fmodd2.o remainderd2.o
INCLUDES_SPU = -I../
INCLUDES_SPU = -I. -I../common
CROSS_SPU = spu-
AR_SPU = $(CROSS_SPU)ar
@@ -66,6 +67,7 @@ INSTALL = install
MAKE_DEFS = \
prefix='$(prefix)' \
prefix_spu='$(prefix_spu)' \
DESTDIR='$(DESTDIR)' \
LIB_BASE='$(LIB_BASE)' \
LIB_NAME='$(LIB_NAME)' \
@@ -89,43 +91,28 @@ $(STATIC_LIB): $(OBJS)
$(RANLIB_SPU) $@
install: $(STATIC_LIB)
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include
$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib
$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB)
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include/simdmath
$(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_spu)/include/simdmath/
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/lib
$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix_spu)/lib/$(STATIC_LIB)
clean:
cd tests; $(MAKE) $(MAKE_DEFS) clean
rm -f $(OBJS)
rm -f $(STATIC_LIB)
$(OBJS): ../simdmath.h
$(OBJS): ../common/simdmath.h
check: $(STATIC_LIB)
cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check
# Some Objects have special header files.
sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h
lldivu2.o lldivi2.o : lldiv.h
sinf4.o sind2.o sincosf4.o cosd2.o: ../common/simdmath/_sincos.h
lldivu2.o lldivi2.o : simdmath/_lldiv.h
%.o: %.c
%.o: ../common/%.c simdmath/%.h
$(CC_SPU) $(CFLAGS_SPU) -c $<
#----------
# C++
#----------
%.o: %.C
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cpp
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cc
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cxx
$(CXX_SPU) $(CFLAGS_SPU) -c $<

View File

@@ -1,127 +0,0 @@
/* cosd2 - Computes the cosine of the each of two double slots.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "sincos_c.h"
vector double
cosd2 (vector double x)
{
vec_double2 xl,xl2,xl3,res;
vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull);
// Range reduction using : xl = angle * TwoOverPi;
//
xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(x))*sign(x))
//
xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull)));
vec_float4 xf = spu_roundtf(xl);
vec_int4 q = spu_convts(xf,0);
q = spu_shuffle(q,q,copyEven);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3)));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = spu_convtf(q,0);
vec_double2 qd = spu_extend(qf);
vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x);
xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1);
// Check if |xl| is a really small number
//
vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull));
vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl);
// Compute x^2 and x^3
//
xl2 = spu_mul(xl,xl);
xl3 = spu_mul(xl2,xl);
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and
// sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5)
//
vec_double2 ct0 = spu_mul(xl2,xl2);
vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D));
vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D));
vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D));
vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D));
vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D));
vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D));
vec_double2 ct4 = spu_madd(ct2,ct0,ct3);
vec_double2 st4 = spu_madd(st2,ct0,st3);
vec_double2 ct5 = spu_mul(ct0,ct0);
vec_double2 ct6 = spu_madd(ct5,ct1,ct4);
vec_double2 st6 = spu_madd(ct5,st1,st4);
vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0));
vec_double2 sx = spu_madd(st6,xl3,xl);
// Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0
//
sx = spu_sel(sx,xl,isTiny);
cx = spu_sel(cx,spu_splats(1.0),isTiny);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
res = spu_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
mask2 = spu_shuffle(mask2,mask2,copyEven);
res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2);
// if input = +/-Inf return NAN
//
res = spu_sel(res, nan, isnand2 (x));
// if input = 0 or denorm return or 1.0
//
vec_ullong2 zeroMask = is0denormd2 (x);
res = spu_sel(res,spu_splats(1.0),zeroMask);
return res;
}

View File

@@ -1,94 +0,0 @@
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "sincos_c.h"
vector float
cosf4 (vector float x)
{
vec_float4 xl,xl2,xl3,res;
vec_int4 q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = spu_mul(x, spu_splats(0.63661977236f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000)));
q = spu_convts(xl,0);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3)));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = spu_convtf(q,0);
vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x);
xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1);
// Compute x^2 and x^3
//
xl2 = spu_mul(xl,xl);
xl3 = spu_mul(xl2,xl);
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1));
vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1));
vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2));
vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2));
vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f));
vec_float4 sx = spu_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
res = spu_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2);
return res;
}

View File

@@ -1,109 +0,0 @@
/* divi4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
divi4_t divi4 (vector signed int numer, vector signed int denom)
{
divi4_t res;
vec_int4 quot, newQuot, shift;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 numerAbs, denomAbs;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
// Determine whether result needs sign change
numerPos = spu_cmpgt( numer, -1 );
denomPos = spu_cmpgt( denom, -1 );
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
denomZeros = spu_cntlz( denomAbs );
numerZeros = spu_cntlz( numerAbs );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = spu_sl( denomAbs, (vec_uint4)shift );
oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) );
// Shift left all leading zeros.
denomLeft = spu_sl( denomAbs, denomZeros );
oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros );
quot = spu_splats(0);
do
{
cont = spu_cmpgt( oneShifted, 0U );
anyCont = spu_extract( spu_gather( cont ), 0 );
newQuot = spu_or( quot, (vec_int4)oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = spu_cmpgt( denomShifted, numerAbs );
newNum = spu_sub( numerAbs, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
shift = (vec_int4)spu_sub( skip, numerZeros );
oneShifted = spu_rlmask( oneLeft, shift );
denomShifted = spu_rlmask( denomLeft, shift );
quot = spu_sel( newQuot, quot, skip );
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg );
res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos );
return res;
}

View File

@@ -1,94 +0,0 @@
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_NAN
#define FP_NAN (0)
#endif
#ifndef FP_INFINITE
#define FP_INFINITE (1)
#endif
#ifndef FP_ZERO
#define FP_ZERO (2)
#endif
#ifndef FP_SUBNORMAL
#define FP_SUBNORMAL (3)
#endif
#ifndef FP_NORMAL
#define FP_NORMAL (4)
#endif
vector signed long long
fpclassifyd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 mask;
vec_llong2 classtype;
vec_uint4 cmpgt, cmpeq;
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
classtype = spu_splats((long long)FP_NORMAL);
//FP_NAN: all-ones exponent and non-zero mantissa
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
//FP_INFINITE: all-ones exponent and zero mantissa
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
//FP_ZERO: zero exponent and zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
return classtype;
}

View File

@@ -1,95 +0,0 @@
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef DBL_NAN
#define DBL_NAN ((long long)0x7FF8000000000000ull)
#endif
vector double
frexpd2 (vector double x, vector signed long long *pexp)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_ullong2 e0, x0, x1;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, sh, ex;
vec_double2 fr, frac = (vec_double2)zero;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
frac = spu_sel( frac, x , isinf );
//x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
frac = spu_sel( frac, (vec_double2)zero , iszero );
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
//Integer Exponent: if x is normal or subnormal
//...shift left to normalize fraction, zero shift if normal
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
fr = (vec_double2)spu_sel( x1, x0, maskdw );
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
fr = spu_sel( fr, x, sign );
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
return frac;
}

View File

@@ -1,84 +0,0 @@
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_ILOGB0
#define FP_ILOGB0 ((int)0x80000001)
#endif
#ifndef FP_ILOGBNAN
#define FP_ILOGBNAN ((int)0x7FFFFFFF)
#endif
vector signed long long
ilogbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, iszeroinf;
vec_llong2 ilogb = (vec_llong2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
return ilogb;
}

View File

@@ -1,123 +0,0 @@
/* Common functions for lldivi2/lldivu2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LLDIV_H__
#define __LLDIV_H__
#include <spu_intrinsics.h>
static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x);
static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count);
static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count);
static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x);
static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y);
static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y);
static inline vector unsigned long long
ll_spu_cntlz(vector unsigned long long x)
{
vec_uint4 cnt;
cnt = spu_cntlz((vec_uint4)x);
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
return (vec_ullong2)cnt;
}
static inline vector unsigned long long
ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
// shift upper word
x_upper = spu_and(x, mask);
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
vec_uint4 cnt_byte;
cnt_byte = spu_add((vec_uint4)count, 7);
// shift upper word
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_andc(x, mask);
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
ll_spu_cmpeq_zero(vector unsigned long long x)
{
vec_uint4 cmp;
cmp = spu_cmpeq((vec_uint4)x, 0);
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
static inline vector unsigned long long
ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 gt;
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vector unsigned long long
ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 borrow;
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
}
#endif // __LLDIV_H__

View File

@@ -1,128 +0,0 @@
/* lldivi2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "lldiv.h"
static inline vector signed long long _negatell2 (vector signed long long x);
static inline vector signed long long
_negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom)
{
lldivi2_t res;
vec_ullong2 numerAbs, denomAbs;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
// Determine whether result needs sign change
numerPos = spu_cmpgt((vec_int4)numer, -1);
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
denomPos = spu_cmpgt((vec_int4)denom, -1);
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos);
denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos);
// Get difference of leading zeros.
denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs );
numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift );
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) );
// Shift left all leading zeros.
denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros );
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
quot = spu_splats(0ull);
do
{
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = ll_spu_cmpgt( denomShifted, numerAbs );
newNum = ll_spu_sub( numerAbs, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
quot = spu_sel( newQuot, quot, skip );
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg);
res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos);
return res;
}

View File

@@ -1,93 +0,0 @@
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef HUGE_VALL
#define HUGE_VALL __builtin_huge_vall ()
#endif
#ifndef DBL_INF
#define DBL_INF ((long long)0x7FF0000000000000ull)
#endif
#ifndef DBL_NAN
#define DBL_NAN ((long long)0x7FF8000000000000ull)
#endif
vector double
logbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_double2 logb = (vec_double2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf );
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
spu_nor( isnan, spu_or( isinf, iszero ) ) );
return logb;
}

View File

@@ -1,92 +0,0 @@
/* nextafterd2 - find next representable floating-point value towards 2nd param.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
nextafterd2 (vector double xx, vector double yy)
{
vec_uint4 abs_x, abs_y, sign_x, abs_dif;
vec_uint4 is_sub, is_zerox, is_zeroy;
vec_uint4 is_equal, is_infy, is_nany;
vec_uint4 res0, res1, res;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
vec_uint4 vec_one = ((vec_uint4){0,1,0,1});
vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1});
vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
// mask sign bit
abs_x = spu_and( (vec_uint4)xx, msk_abs);
abs_y = spu_and( (vec_uint4)yy, msk_abs);
is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
// -0 exception
sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
sign_x = spu_sel(sign_x, vec_zero, is_zerox);
// if same sign |y| < |x| -> decrease
abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease
is_sub = spu_or(is_sub, abs_dif);
is_sub = spu_rlmaska(is_sub, -31);
is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
res = spu_sel( res0, res1, is_sub); // select increase or decrease
res = spu_or( res, sign_x); // set sign
// check exception
// 0 -> -1
res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
// check equal (include 0,-0)
is_zeroy = spu_cmpeq( abs_y, vec_zero);
is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
res = spu_sel(res, (vec_uint4)yy, is_equal);
// check nan
is_infy = spu_cmpeq( abs_y, msk_exp);
is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
is_nany = spu_and( abs_y, msk_exp);
is_nany = spu_cmpeq( is_nany, msk_exp);
is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
is_nany = spu_sel( is_nany, vec_zero, is_infy);
res = spu_sel(res, (vec_uint4)yy, is_nany);
return (vec_double2)res;
}

View File

@@ -1,72 +0,0 @@
/* powf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
powf4 (vector float x, vector float y)
{
vec_int4 zeros = spu_splats((int)0);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 absy = spu_andc(y, sbit);
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
vec_float4 res = exp2f4(spu_mul(y, log2f4(absx)));
res = spu_sel(res, spu_or(sbit, res), negmask);
return spu_sel(res, (vec_float4)zeros, zeromask);
}
/*
{
vec_int4 zeros = spu_splats(0);
vec_int4 ones = (vec_int4)spu_splats((char)0xFF);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
vec_uchar16 onemask = (vec_uchar16)spu_cmpeq((vec_float4)ones , y);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 absy = spu_andc(y, sbit);
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
}
*/

View File

@@ -1,313 +0,0 @@
/* A vector double is returned that contains the remainder xi REM yi,
for the corresponding elements of vector double x and vector double y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _twice(vec_uint4 aa);
vector double
remainderd2(vector double x, vector double yy)
{
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 y_hi;
vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
vec_uint4 bias;
vec_uint4 nan_out, overflow;
vec_uint4 result;
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
vec_uint4 vec_zero = spu_splats((unsigned int)0);
vec_uint4 is_zeroy;
// cut sign
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
// check nan out
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
nan_out = spu_or(nan_out, is_zeroy); // y = 0
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
// make y x2
abs_2y = _twice(abs_yy); // 2 x y
/*
* use fmodd2 function
*/
// get remainder of y x2
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y);
{
vec_double2 y = (vec_double2)abs_2y;
int shiftx0, shiftx1, shifty0, shifty1;
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 n, shift;
vec_uint4 exp_x, exp_y;
// , sign;
// vec_uint4 abs_x, abs_y;
vec_uint4 abs_y;
vec_uint4 mant_x, mant_x0, mant_x1;
vec_uint4 mant_y, mant_y0, mant_y1;
vec_uint4 mant_0, mant_1;
vec_uint4 mant_r, mant_l;
// vec_uint4 result;
vec_uint4 result0, resultx;
vec_uint4 zero_x, zero_y;
vec_uint4 denorm_x, denorm_y;
vec_uint4 cnt, cnt_x, cnt_y;
vec_uint4 shift_x, shift_y;
vec_uint4 adj_x, adj_y;
vec_uint4 z, borrow, mask;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
// sign = spu_and( (vec_uint4)x, sign_mask);
// abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
exp_x = spu_rlmask(abs_x, -20);
exp_y = spu_rlmask(abs_y, -20);
// get shift count for denorm
cnt_x = spu_cntlz(abs_x);
cnt_y = spu_cntlz(abs_y);
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
result0 = spu_or(zero_x, zero_y);
result0 = spu_shuffle(result0, result0, splat_hi);
// 0 - (cnt_x - 11) = 11 - cnt_x
shift_x= spu_add(cnt_x, -11);
shift_y= spu_add(cnt_y, -11);
cnt_x = spu_sub(11, cnt_x);
cnt_y = spu_sub(11, cnt_y);
// count to normalize
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
// for denorm
shiftx0 = spu_extract(shift_x, 0);
shiftx1 = spu_extract(shift_x, 2);
shifty0 = spu_extract(shift_y, 0);
shifty1 = spu_extract(shift_y, 2);
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
mant_x = spu_or(mant_x, implied_1); // hidden bit
mant_y = spu_or(mant_y, implied_1); // hidden bit
// x < y ?
resultx = _vec_gt64(abs_y, abs_x);
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
mask = spu_cmpgt(n, 0);
mask = spu_andc(mask, resultx);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
// bring back to original range
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
// for adj_y < 0 exp max=1
shiftx0 = spu_extract(adj_y, 0);
shiftx1 = spu_extract(adj_y, 2);
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
// for adj_y >= 0
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(cnt, -11);
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
shiftx0 = spu_extract(shift, 0);
shiftx1 = spu_extract(shift, 2);
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
cnt = spu_sub(adj_y, (vec_uint4)shift);
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
result = spu_sel(mant_l, mant_r, denorm_y);
result = spu_sel(result, vec_zero, result0); // reminder 0
result = spu_sel(result, abs_x, resultx); // x < y
// result = spu_xor(result, sign); // set sign
// return ((vec_double2)result);
}
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
/* if (2*x > y)
* x -= y
* if (2*x >= y) x -= y
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
// make x2
abs_2x = _twice(abs_x); // 2 x x
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
bias = spu_andc(bias, overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
// make x2
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
/* select final answer
*/
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
result = spu_sel(result, val_nan, nan_out); // if nan
return ((vec_double2)result);
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
/*
* double formated x2
*/
static inline vec_uint4 _twice(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}

View File

@@ -1,107 +0,0 @@
/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
vector float
remainderf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, i, rem;
vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
// Find i = truncated_integer(|x/y|)
// By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
// Thus the exact truncation is either the truncated quotient, one less, or one greater.
q = divf4( x, y );
xabs = fabsf4( x );
yabs = fabsf4( y );
qabs = fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
odd0 = odd2 = spu_nor( odd1, odd1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
i = i0;
i = spu_sel( i1, i, cmp1 );
i = spu_sel( i2, i, cmp2 );
odd = odd0;
odd = spu_sel( odd1, odd, cmp1 );
odd = spu_sel( odd2, odd, cmp2 );
rem = spu_nmsub( i, yabs, xabs );
// Test whether i or i+1 = nearest_integer(|x/y|)
//
// i+1 is correct if:
//
// rem > 0.5*|y|
// or
// rem = 0.5*|y| and i is odd
yabshalf = spu_mul( yabs, spu_splats(0.5f) );
cmp1 = spu_cmpgt( rem, yabshalf );
cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
i = copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}

View File

@@ -1,356 +0,0 @@
/* remquod2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/*
* This function returns the same vector double result as remainderd2().
* In addition a vector signed long long is storedin *pquo,
* that contains the corresponding element values whose sign is
* the sign of xi / yi and whose magnitude is congruent modulo 2n to
* the magnitude of the integral quotient of xi / yi, where n is
* an implementation-defined integer greater than or equal to 3.
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _twice(vec_uint4 aa);
vector double
remquod2(vector double x, vector double yy, vector signed long long *quo)
{
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 quotient, quotient0;
vec_uint4 y_hi;
vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y;
vec_uint4 bias;
vec_uint4 nan_out, not_ge, quo_pos, overflow;
vec_uint4 result;
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
vec_uint4 vec_zero = spu_splats((unsigned int)0);
vec_uint4 is_zeroy;
// cut sign
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1);
quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
// check nan out
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
nan_out = spu_or(nan_out, is_zeroy); // y = 0
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
// make y x8
abs_2y = _twice(abs_yy); // 2 x y
abs_4y = _twice(abs_2y); // 4 x y
abs_8y = _twice(abs_4y); // 2 x y
/*
* use fmodd2 function
*/
// get remainder of y x8
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y);
{
vec_double2 y = (vec_double2)abs_8y;
int shiftx0, shiftx1, shifty0, shifty1;
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 n, shift;
vec_uint4 exp_x, exp_y;
// , sign;
// vec_uint4 abs_x, abs_y;
vec_uint4 abs_y;
vec_uint4 mant_x, mant_x0, mant_x1;
vec_uint4 mant_y, mant_y0, mant_y1;
vec_uint4 mant_0, mant_1;
vec_uint4 mant_r, mant_l;
// vec_uint4 result;
vec_uint4 result0, resultx;
vec_uint4 zero_x, zero_y;
vec_uint4 denorm_x, denorm_y;
vec_uint4 cnt, cnt_x, cnt_y;
vec_uint4 shift_x, shift_y;
vec_uint4 adj_x, adj_y;
vec_uint4 z, borrow, mask;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
// sign = spu_and( (vec_uint4)x, sign_mask);
// abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
exp_x = spu_rlmask(abs_x, -20);
exp_y = spu_rlmask(abs_y, -20);
// get shift count for denorm
cnt_x = spu_cntlz(abs_x);
cnt_y = spu_cntlz(abs_y);
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
result0 = spu_or(zero_x, zero_y);
result0 = spu_shuffle(result0, result0, splat_hi);
// 0 - (cnt_x - 11) = 11 - cnt_x
shift_x= spu_add(cnt_x, -11);
shift_y= spu_add(cnt_y, -11);
cnt_x = spu_sub(11, cnt_x);
cnt_y = spu_sub(11, cnt_y);
// count to normalize
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
// for denorm
shiftx0 = spu_extract(shift_x, 0);
shiftx1 = spu_extract(shift_x, 2);
shifty0 = spu_extract(shift_y, 0);
shifty1 = spu_extract(shift_y, 2);
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
mant_x = spu_or(mant_x, implied_1); // hidden bit
mant_y = spu_or(mant_y, implied_1); // hidden bit
// x < y ?
resultx = _vec_gt64(abs_y, abs_x);
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
mask = spu_cmpgt(n, 0);
mask = spu_andc(mask, resultx);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
// bring back to original range
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
// for adj_y < 0 exp max=1
shiftx0 = spu_extract(adj_y, 0);
shiftx1 = spu_extract(adj_y, 2);
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
// for adj_y >= 0
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(cnt, -11);
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
shiftx0 = spu_extract(shift, 0);
shiftx1 = spu_extract(shift, 2);
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
cnt = spu_sub(adj_y, (vec_uint4)shift);
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
result = spu_sel(mant_l, mant_r, denorm_y);
result = spu_sel(result, vec_zero, result0); // reminder 0
result = spu_sel(result, abs_x, resultx); // x < y
// result = spu_xor(result, sign); // set sign
// return ((vec_double2)result);
}
// if y (x8->exp+3 7FF-7FC) overflow
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
/* if (x >= 4*y)
* x -= 4*y
* quotient = 4
* else
* quotient = 0
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF));
not_ge = _vec_gt64(abs_4y, abs_x);
not_ge = spu_or(not_ge, overflow);
abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge);
quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge);
/* if (x >= 2*y
* x -= 2*y
* quotient += 2
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF));
not_ge = _vec_gt64(abs_2y, abs_x); // abs_2y > abs_x
not_ge = spu_or(not_ge, overflow);
abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge);
quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
/* if (2*x > y)
* x -= y
* if (2*x >= y) x -= y
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
// make x2
abs_2x = _twice(abs_x); // 2 x x
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
bias = spu_andc(bias, overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
quotient = spu_sub(quotient, (vec_int4)bias);
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
// make x2
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
quotient = spu_sub(quotient, (vec_int4)bias);
/* select final answer
*/
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
result = spu_sel(result, val_nan, nan_out); // if nan
quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit
quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4));
quotient = spu_sel(quotient0, quotient, quo_pos);
*quo = (vec_llong2)quotient;
return ((vec_double2)result);
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
/*
* double formated x2
*/
static inline vec_uint4 _twice(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}

View File

@@ -1,96 +0,0 @@
/* rsqrtd2 - for each of two double slots, compute reciprocal square root.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles exceptional values as follows:
// NaN -> NaN
// (+,-)0 -> (+,-)Inf
// +Inf -> +0
// -Inf -> Nan
// -Finite -> Nan
// Denormal inputs are treated as zero.
vector double rsqrtd2 (vector double x)
{
vec_ullong2 expmask, onemask, signmask, evenexp;
vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result;
vec_float4 halff, onef, manf, y0f, y1f;
expmask = spu_splats(0x7ff0000000000000ull);
onemask = spu_splats(0x0010000000000000ull);
signmask = spu_splats(0x8000000000000000ull);
onef = spu_splats(1.0f);
one = spu_extend( onef );
halff = spu_splats(0.5f);
half = spu_extend( halff );
// Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) )
// where i = 0 when exponent is even and i = 1 when exponent is odd.
//
// Compute reciprocal-square-root of second factor by finding -(exponent+i)/2:
//
// biased_exp = 1023 + exponent
// new_biased_exp = 1023 - (exponent+i)/2
// = 1023 - (biased_exp-1023+i)/2
// = (3069 - (biased_exp+i)) / 2
evenexp = spu_and( (vec_ullong2)x, onemask );
man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask );
exp = spu_and( x, (vec_double2)expmask );
nexp = spu_or( exp, (vec_double2)onemask );
nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 );
// Compute mantissa part in single precision.
// Convert back to double and multiply with 2^(-(exponent+i)/2), then
// do two Newton-Raphson steps for full precision.
manf = spu_roundtf( man );
y0f = spu_rsqrte( manf );
y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f );
y1 = spu_mul( spu_extend( y1f ), nexp );
y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 );
y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 );
// Choose iterated result or special value.
zero = spu_and( x, (vec_double2)signmask );
inf = spu_sel( (vec_double2)expmask, x, signmask );
nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
result = spu_sel( y3, zero, isinfd2 ( x ) );
result = spu_sel( result, nan, signbitd2 ( x ) );
result = spu_sel( result, inf, is0denormd2 ( x ) );
return result;
}

View File

@@ -0,0 +1,116 @@
/* Common functions for lldivi2/lldivu2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIV_H___
#define ___SIMD_MATH_LLDIV_H___
#include <spu_intrinsics.h>
static inline vector unsigned long long
__ll_spu_cntlz(vector unsigned long long x)
{
vec_uint4 cnt;
cnt = spu_cntlz((vec_uint4)x);
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
return (vec_ullong2)cnt;
}
static inline vector unsigned long long
__ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
// shift upper word
x_upper = spu_and(x, mask);
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
__ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
vec_uint4 cnt_byte;
cnt_byte = spu_add((vec_uint4)count, 7);
// shift upper word
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_andc(x, mask);
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
__ll_spu_cmpeq_zero(vector unsigned long long x)
{
vec_uint4 cmp;
cmp = spu_cmpeq((vec_uint4)x, 0);
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
static inline vector unsigned long long
__ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 gt;
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vector unsigned long long
__ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 borrow;
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
}
#endif // __LLDIV_H__

View File

@@ -0,0 +1,84 @@
/* A vector double is returned that contains the internal routine regarding remainder.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH__REMAINDER_H___
#define ___SIMD_MATH__REMAINDER_H___
#include <simdmath/_vec_utils.h>
/*
* double formated x2
*/
static inline vec_uint4
__rem_twice_d(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4
__rem_sub_d(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = __vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
#endif

View File

@@ -0,0 +1,57 @@
/* Common types for SPU SIMD Math Library
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH__VEC_UTILS_H___
#define ___SIMD_MATH__VEC_UTILS_H___
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4
__vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4
__vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = __vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vec_uint4
__vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 eq = spu_cmpeq(aa, bb);
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
#endif

View File

@@ -27,14 +27,18 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ABSI4_H___
#define ___SIMD_MATH_ABSI4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int
absi4 (vector signed int x)
static inline vector signed int
_absi4 (vector signed int x)
{
vec_int4 neg;
neg = spu_sub( 0, x );
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
vec_int4 neg;
neg = spu_sub( 0, x );
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
}
#endif

View File

@@ -27,52 +27,56 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ACOSF4_H___
#define ___SIMD_MATH_ACOSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sqrtf4.h>
//
// Computes the inverse cosine of all four slots of x
//
vector float
acosf4 (vector float x)
static inline vector float
_acosf4 (vector float x)
{
vec_float4 result, xabs;
vec_float4 t1;
vec_float4 xabs2, xabs4;
vec_float4 hi, lo;
vec_float4 neg, pos;
vec_uint4 select;
vec_float4 result, xabs;
vec_float4 t1;
vec_float4 xabs2, xabs4;
vec_float4 hi, lo;
vec_float4 neg, pos;
vec_uint4 select;
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
t1 = _sqrtf4(spu_sub( spu_splats(1.0f), xabs));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = spu_mul(xabs, xabs);
xabs4 = spu_mul(xabs2, xabs2);
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = spu_mul(xabs, xabs);
xabs4 = spu_mul(xabs2, xabs2);
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
result = spu_madd(hi, xabs4, lo);
result = spu_madd(hi, xabs4, lo);
/* Adjust the result if x is negactive.
*/
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
pos = spu_mul(t1, result);
/* Adjust the result if x is negactive.
*/
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
pos = spu_mul(t1, result);
result = spu_sel(pos, neg, select);
result = spu_sel(pos, neg, select);
return result;
return result;
}
#endif

View File

@@ -27,59 +27,66 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ASINF4_H___
#define ___SIMD_MATH_ASINF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
asinf4 (vector float x)
#include <simdmath/sqrtf4.h>
#include <simdmath/divf4.h>
static inline vector float
_asinf4 (vector float x)
{
// positive = (x > 0)
//
vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f));
// positive = (x > 0)
//
vec_uint4 positive = spu_cmpgt(x,spu_splats(0.0f));
// gtHalf = (|x| > 0.5)
//
vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f));
// gtHalf = (|x| > 0.5)
//
vec_uint4 gtHalf = spu_cmpabsgt(x,spu_splats(0.5f));
// x = absf(x)
//
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
// x = absf(x)
//
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf);
x = spu_sel(x,spu_mul(spu_splats(-2.0f),_sqrtf4(g)),gtHalf);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
denom = spu_madd(denom,g,spu_splats(5.603603363f));
num = spu_mul(spu_mul(x,g),num);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
denom = spu_madd(denom,g,spu_splats(5.603603363f));
num = spu_mul(spu_mul(x,g),num);
// x = x + num / denom
//
x = spu_add(x,divf4(num,denom));
// x = x + num / denom
//
x = spu_add(x,_divf4(num,denom));
// if (x > 0.5)
// x = x + M_PI_2
//
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
// if (x > 0.5)
// x = x + M_PI_2
//
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
// if (!positive) x = -x
//
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
// if (!positive) x = -x
//
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
return x;
return x;
}
#endif

View File

@@ -27,34 +27,40 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATAN2F4_H___
#define ___SIMD_MATH_ATAN2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/atanf4.h>
#include <simdmath/divf4.h>
//
// Inverse tangent function of two variables
//
vector float
atan2f4 (vector float y, vector float x)
static inline vector float
_atan2f4 (vector float y, vector float x)
{
vec_float4 res = atanf4(divf4(y,x));
vec_float4 res = _atanf4(_divf4(y,x));
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y);
vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x);
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vec_uint4 yNeg = spu_cmpgt(spu_splats(0.0f),y);
vec_uint4 xNeg = spu_cmpgt(spu_splats(0.0f),x);
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
vec_float4 newRes = spu_add(bias, res);
vec_float4 newRes = spu_add(bias, res);
res = spu_sel(res,newRes,xNeg);
res = spu_sel(res,newRes,xNeg);
return res;
return res;
}
#endif

View File

@@ -27,50 +27,55 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATANF4_H___
#define ___SIMD_MATH_ATANF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/recipf4.h>
//
// Computes the inverse tangent of all four slots of x.
//
vector float
atanf4 (vector float x)
static inline vector float
_atanf4 (vector float x)
{
vec_float4 bias;
vec_float4 x2, x3, x4, x8, x9;
vec_float4 hi, lo;
vec_float4 result;
vec_float4 inv_x;
vec_uint4 sign;
vec_uint4 select;
vec_float4 bias;
vec_float4 x2, x3, x4, x8, x9;
vec_float4 hi, lo;
vec_float4 result;
vec_float4 inv_x;
vec_uint4 sign;
vec_uint4 select;
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
inv_x = recipf4(x);
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
inv_x = _recipf4(x);
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
bias = (vec_float4)spu_and((vec_uint4)bias, select);
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
bias = (vec_float4)spu_and((vec_uint4)bias, select);
x = spu_sel(x, inv_x, select);
x = spu_sel(x, inv_x, select);
bias = spu_add(bias, x);
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x9 = spu_mul(x8, x);
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
lo = spu_madd(lo, x3, bias);
bias = spu_add(bias, x);
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x9 = spu_mul(x8, x);
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
lo = spu_madd(lo, x3, bias);
result = spu_madd(hi, x9, lo);
result = spu_madd(hi, x9, lo);
return result;
return result;
}
#endif

View File

@@ -27,79 +27,69 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CBRTF4_H___
#define ___SIMD_MATH_CBRTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/frexpf4.h>
#include <simdmath/ldexpf4.h>
#include <simdmath/divf4.h>
#define __calcQuot(xexp) n = xexp; \
vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n); \
n = spu_sel(n, spu_add(n,2), negxexpmask); \
\
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); \
quot = spu_add(quot, spu_rlmaska(quot, -4)); \
quot = spu_add(quot, spu_rlmaska(quot, -8)); \
quot = spu_add(quot, spu_rlmaska(quot,-16)); \
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); \
quot = spu_add( \
quot, \
spu_rlmaska( \
spu_add( \
spu_add(r,5), \
spu_sl (r,2) \
), \
-4 \
) \
); \
static inline vec_int4
__cbrtf4_calc_quot(vec_int4 n)
{
vec_int4 quot;
vec_uint4 negxexpmask = spu_cmpgt(spu_splats(0), n);
n = spu_sel(n, spu_add(n,2), negxexpmask);
#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3)
#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));
quot = spu_add(quot, spu_rlmaska(quot, -4));
quot = spu_add(quot, spu_rlmaska(quot, -8));
quot = spu_add(quot, spu_rlmaska(quot,-16));
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));
quot = spu_add(quot, spu_rlmaska(spu_add(spu_add(r,5), spu_sl (r,2)), -4));
return quot;
}
vector float
cbrtf4 (vector float x)
#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3)
#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
static inline vector float
_cbrtf4 (vector float x)
{
vec_float4 zeros = spu_splats(0.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros);
vec_int4 xexp, n;
vec_uint4 zeromask = spu_cmpeq(x, zeros);
vec_int4 xexp;
vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x);
x = spu_and(x, sgnmask);
x = frexpf4(x, &xexp);
x = _frexpf4(x, &xexp);
vec_float4 p = spu_madd(
spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
x,
spu_splats(0.492659620528969547f)
);
vec_float4 p3 = spu_mul(p, spu_mul(p, p));
vec_int4 quot;
__calcQuot(xexp);
vec_int4 quot = __cbrtf4_calc_quot(xexp);
vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2));
factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1));
vec_float4 factor = spu_splats((float)(1.0/__CBRTF_sqr_cbrt2));
factor = spu_sel(factor, spu_splats((float)(1.0/__CBRTF_cbrt2)), spu_cmpeq(modval,-1));
factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0));
factor = spu_sel(factor, spu_splats((float)( _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1));
factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2));
factor = spu_sel(factor, spu_splats((float)( __CBRTF_cbrt2)), spu_cmpeq(modval, 1));
factor = spu_sel(factor, spu_splats((float)(__CBRTF_sqr_cbrt2)), spu_cmpeq(modval, 2));
vec_float4 pre = spu_mul(p, factor);
vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
vec_float4 res = spu_mul(pre, divf4(numr, denr));
res = ldexpf4(res, quot);
vec_float4 res = spu_mul(pre, _divf4(numr, denr));
res = _ldexpf4(res, quot);
return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
zeros,
zeromask);
}
/*
_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
{
vec_uchar16 neg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx)));
res = spu_sel(res, spu_or(sbit, res), neg);
return res;
}
*/
#endif

View File

@@ -27,11 +27,14 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CEILD2_H___
#define ___SIMD_MATH_CEILD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
ceild2(vector double in)
static inline vector double
_ceild2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
@@ -83,7 +86,7 @@ ceild2(vector double in)
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
/* replace insert
*/
*/
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
/* in + addend
@@ -92,3 +95,5 @@ ceild2(vector double in)
return (out);
}
#endif

View File

@@ -27,28 +27,32 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CEILF4_H___
#define ___SIMD_MATH_CEILF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
ceilf4 (vector float x)
static inline vector float
_ceilf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one greater.
// Find truncated value and one greater.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, 1 );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, 1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is less than input, add one.
// If truncated value is less than input, add one.
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
}
#endif

View File

@@ -27,13 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COPYSIGND2_H___
#define ___SIMD_MATH_COPYSIGND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double copysignd2 (vector double x, vector double y)
static inline vector double
_copysignd2 (vector double x, vector double y)
{
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
}
#endif

View File

@@ -27,13 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COPYSIGNF4_H___
#define ___SIMD_MATH_COPYSIGNF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
copysignf4 (vector float x, vector float y)
static inline vector float
_copysignf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_splats(0x80000000) );
return spu_sel( x, y, spu_splats(0x80000000) );
}
#endif

View File

@@ -0,0 +1,46 @@
/* cosd2 - Computes the cosine of the each of two double slots.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COSD2_H___
#define ___SIMD_MATH_COSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sincosd2.h>
static inline vector double
_cosd2 (vector double x)
{
vec_double2 s, c;
_sincosd2(x, &s, &c);
return c;
}
#endif

View File

@@ -0,0 +1,46 @@
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COSF4_H___
#define ___SIMD_MATH_COSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sincosf4.h>
static inline vector float
_cosf4 (vector float x)
{
vec_float4 s, c;
_sincosf4(x, &s, &c);
return c;
}
#endif

View File

@@ -27,15 +27,21 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVD2_H___
#define ___SIMD_MATH_DIVD2_H___
// Equal to numer * recipd2(denom)
// See recipd2 for results of special values.
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
divd2 (vector double numer, vector double denom)
#include <simdmath/recipd2.h>
static inline vector double
_divd2 (vector double numer, vector double denom)
{
return spu_mul( numer, recipd2( denom ) );
return spu_mul( numer, _recipd2( denom ) );
}
#endif

View File

@@ -27,20 +27,24 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVF4_H___
#define ___SIMD_MATH_DIVF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
divf4 (vector float numer, vector float denom)
static inline vector float
_divf4 (vector float numer, vector float denom)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Uses constant of 1.0 + 1 ulp to improve accuracy.
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Uses constant of 1.0 + 1 ulp to improve accuracy.
vector float y0, y0numer;
vector float oneish = (vector float)spu_splats(0x3f800001);
vector float y0, y0numer;
vector float oneish = (vector float)spu_splats(0x3f800001);
y0 = spu_re( denom );
y0numer = spu_mul( numer, y0 );
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
y0 = spu_re( denom );
y0numer = spu_mul( numer, y0 );
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
}
#endif

View File

@@ -0,0 +1,67 @@
/* divi4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVI4_H___
#define ___SIMD_MATH_DIVI4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divu4.h>
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline divi4_t
_divi4 (vector signed int numer, vector signed int denom)
{
divu4_t resAbs;
divi4_t res;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 numerAbs, denomAbs;
// Determine whether result needs sign change
numerPos = spu_cmpgt( numer, -1 );
denomPos = spu_cmpgt( denom, -1 );
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
resAbs = _divu4(numerAbs, denomAbs);
res.quot = spu_sel( (vec_int4)resAbs.quot, spu_sub( 0, (vec_int4)resAbs.quot ), quotNeg );
res.rem = spu_sel( spu_sub( 0, (vec_int4)resAbs.rem ), (vec_int4)resAbs.rem, numerPos );
return res;
}
#endif

View File

@@ -27,44 +27,48 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVU4_H___
#define ___SIMD_MATH_DIVU4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
// and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
static inline divu4_t
_divu4 (vector unsigned int numer, vector unsigned int denom)
{
divu4_t res;
vec_int4 shift;
vec_uint4 quot, newQuot;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
divu4_t res;
vec_int4 shift;
vec_uint4 quot, newQuot;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
denomZeros = spu_cntlz( denom );
numerZeros = spu_cntlz( numer );
denomZeros = spu_cntlz( denom );
numerZeros = spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
// Shift denom to align leading one with numerator's
denomShifted = spu_sl( denom, (vec_uint4)shift );
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
denomShifted = spu_sl( denom, (vec_uint4)shift );
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
// Shift left all leading zeros.
// Shift left all leading zeros.
denomLeft = spu_sl( denom, denomZeros );
oneLeft = spu_sl( spu_splats(1U), denomZeros );
denomLeft = spu_sl( denom, denomZeros );
oneLeft = spu_sl( spu_splats(1U), denomZeros );
quot = spu_splats(0U);
quot = spu_splats(0U);
do
{
do
{
cont = spu_cmpgt( oneShifted, 0U );
anyCont = spu_extract( spu_gather( cont ), 0 );
@@ -87,11 +91,12 @@ divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
res.quot = quot;
res.rem = numer;
return res;
}
#endif

View File

@@ -27,6 +27,8 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXP2F4_H___
#define ___SIMD_MATH_EXP2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -72,10 +74,10 @@
*/
#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */
#define __EXP2F_LN2 0.69314718055995f /* ln(2) */
vector float
exp2f4 (vector float x)
static inline vector float
_exp2f4 (vector float x)
{
vec_int4 ix;
vec_uint4 overflow, underflow;
@@ -91,7 +93,7 @@ exp2f4 (vector float x)
bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
ix = spu_convts(spu_add(x, bias), 0);
frac = spu_sub(spu_convtf(ix, 0), x);
frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2));
frac = spu_mul(frac, spu_splats(__EXP2F_LN2));
// !!! HRD Changing weird un-understandable and incorrect overflow handling code
//overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));
@@ -99,7 +101,7 @@ exp2f4 (vector float x)
underflow = spu_cmpgt(spu_splats(-126.0f), x);
//exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
// !!! overflow (x >= 127.999999f)
// !!! overflow (x >= 127.999999f)
exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation
exp_int = spu_add(exp_int, exp_int); // !!! HRD
@@ -123,9 +125,11 @@ exp2f4 (vector float x)
result = spu_mul(exp_frac, exp_int);
/* Handle overflow */
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow);
result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow);
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), overflow);
result = spu_sel(result, (vec_float4)spu_splats(0), underflow);
//result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));
return (result);
}
#endif

View File

@@ -27,37 +27,44 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPF4_H___
#define ___SIMD_MATH_EXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _EXPF_H_C1 ((float)-0.6931470632553101f)
#define _EXPF_H_C2 ((float)-1.1730463525082e-7f)
#include <simdmath/divf4.h>
#include <simdmath/ldexpf4.h>
#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
#define __EXPF_C1 -0.6931470632553101f
#define __EXPF_C2 -1.1730463525082e-7f
vector float
expf4 (vector float x)
#define __EXPF_INVLN2 1.4426950408889634f
static inline vector float
_expf4 (vector float x)
{
vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 goffset = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask);
vec_float4 g = spu_mul(x, spu_splats(_EXPF_H_INVLN2));
vec_uint4 xnegmask = spu_cmpgt(spu_splats(0.0f), x);
vec_float4 goffset = spu_sel(spu_splats(0.5f),spu_splats(-0.5f),xnegmask);
vec_float4 g = spu_mul(x, spu_splats(__EXPF_INVLN2));
vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
g = spu_convtf(xexp, 0);
g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x));
g = spu_madd(g, spu_splats(__EXPF_C2), spu_madd(g, spu_splats(__EXPF_C1), x));
vec_float4 z = spu_mul(g, g);
vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f));
vec_float4 a = spu_mul(z, spu_splats(0.0999748594f));
vec_float4 b = spu_mul(g,
spu_madd(z,
spu_splats((float)0.0083208258f),
spu_splats((float)0.4999999992f)
spu_splats(0.0083208258f),
spu_splats(0.4999999992f)
)
);
vec_float4 foo = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
spu_add(spu_splats(1.0f), spu_sub(a, b)));
vec_float4 foo = _divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
spu_add(spu_splats(1.0f), spu_sub(a, b)));
return ldexpf4(foo, xexp);
return _ldexpf4(foo, xexp);
}
#endif

View File

@@ -27,28 +27,36 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPMLF4_H___
#define ___SIMD_MATH_EXPMLF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
#include <simdmath/expf4.h>
#include <simdmath/divf4.h>
vector float
expm1f4 (vector float x)
#define __EXPM1F_ln1by2 -0.6931471805599f
#define __EXPM1F_ln3by2 0.4054651081082f
static inline vector float
_expm1f4 (vector float x)
{
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)),
spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x));
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(__EXPM1F_ln1by2)),
spu_cmpgt(spu_splats(__EXPM1F_ln3by2), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f));
d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0);
d0 = spu_madd(x , spu_splats(-0.3203561199f), spu_splats(0.9483177697f));
d1 = spu_madd(x2, spu_splats(0.0326527809f), d0);
n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f));
n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f));
n0 = spu_madd(x , spu_splats(0.1538026623f), spu_splats(0.9483177732f));
n1 = spu_madd(x , spu_splats(0.0024490478f), spu_splats(0.0305274668f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)),
spu_mul(x, divf4(n1, d1)),
return spu_sel(spu_sub(_expf4(x), spu_splats(1.0f)),
spu_mul(x, _divf4(n1, d1)),
nearzeromask);
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FABSD2_H___
#define ___SIMD_MATH_FABSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double fabsd2 (vector double x)
static inline vector double
_fabsd2 (vector double x)
{
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FABSF4_H___
#define ___SIMD_MATH_FABSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float fabsf4 (vector float x)
static inline vector float
_fabsf4 (vector float x)
{
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
}
#endif

View File

@@ -27,13 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FDIMD2_H___
#define ___SIMD_MATH_FDIMD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
/* fdim_v - compute the positive difference of x and y.
*/
vector double
fdimd2 (vector double x, vector double y)
static inline vector double
_fdimd2 (vector double x, vector double y)
{
vec_double2 v;
vec_uint4 mask;
@@ -44,3 +47,5 @@ fdimd2 (vector double x, vector double y)
return (v);
}
#endif

View File

@@ -27,12 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FDIMF4_H___
#define ___SIMD_MATH_FDIMF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fdimf4 (vector float x, vector float y)
static inline vector float
_fdimf4 (vector float x, vector float y)
{
vec_float4 diff = spu_sub(x,y);
return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
}
#endif

View File

@@ -27,11 +27,14 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FLOORD2_H___
#define ___SIMD_MATH_FLOORD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
floord2(vector double in)
static inline vector double
_floord2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
@@ -74,7 +77,7 @@ floord2(vector double in)
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
/* insert
/* insert
*/
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
@@ -92,3 +95,5 @@ floord2(vector double in)
return (out);
}
#endif

View File

@@ -27,28 +27,32 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FLOORF4_H___
#define ___SIMD_MATH_FLOORF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
floorf4 (vector float x)
static inline vector float
_floorf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one less.
// Find truncated value and one less.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, -1 );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, -1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is greater than input, subtract one.
// If truncated value is greater than input, subtract one.
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAD2_H___
#define ___SIMD_MATH_FMAD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
fmad2 (vector double x, vector double y, vector double z)
static inline vector double
_fmad2 (vector double x, vector double y, vector double z)
{
return spu_madd(x,y,z);
}
#endif

View File

@@ -27,12 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAF4_H___
#define ___SIMD_MATH_FMAF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fmaf4 (vector float x, vector float y, vector float z)
static inline vector float
_fmaf4 (vector float x, vector float y, vector float z)
{
return spu_madd(x,y,z);
}
#endif

View File

@@ -27,6 +27,8 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAXD2_H___
#define ___SIMD_MATH_FMAXD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -36,8 +38,8 @@
* is returned.
*/
vector double
fmaxd2 (vector double x, vector double y)
static inline vector double
_fmaxd2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
@@ -66,3 +68,4 @@ fmaxd2 (vector double x, vector double y)
return spu_sel(x, y, selector);
}
#endif

View File

@@ -27,14 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAXF4_H___
#define ___SIMD_MATH_FMAXF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fmaxf4 (vector float x, vector float y)
static inline vector float
_fmaxf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( y, x ) );
return spu_sel( x, y, spu_cmpgt( y, x ) );
}
#endif

View File

@@ -27,6 +27,9 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMIND2_H___
#define ___SIMD_MATH_FMIND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -35,8 +38,8 @@
* is returned.
*/
vector double
fmind2 (vector double x, vector double y)
static inline vector double
_fmind2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
@@ -65,3 +68,4 @@ fmind2 (vector double x, vector double y)
return spu_sel(x, y, selector);
}
#endif

View File

@@ -27,14 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMINF4_H___
#define ___SIMD_MATH_FMINF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fminf4 (vector float x, vector float y)
static inline vector float
_fminf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( x, y ) );
return spu_sel( x, y, spu_cmpgt( x, y ) );
}
#endif

View File

@@ -27,10 +27,14 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMODD2_H___
#define ___SIMD_MATH_FMODD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_vec_utils.h>
/*
* a vector is returned that contains the remainder of xi/yi,
* for coresponding elements of vector double x and vector double y,
@@ -41,11 +45,8 @@
* magnitude less than |yi|
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb);
vector double fmodd2(vector double x, vector double y)
static inline vector double
_fmodd2(vector double x, vector double y)
{
int shift0, shift1;
vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
@@ -82,20 +83,20 @@ vector double fmodd2(vector double x, vector double y)
exp_y = spu_rlmask(y_hi, -20);
// y>x
resultx = _vec_gt64(abs_y, abs_x);
resultx = __vec_gt64(abs_y, abs_x);
//is Inf, is Nan
x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
x_inf = __vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
x_nan = spu_andc(x_7ff, x_inf);
y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
y_inf = __vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
y_nan = spu_andc(y_7ff, y_inf);
// is zero
zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
zero_x = __vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
zero_y = __vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
/* Determine ilogb of abs_x and abs_y and
@@ -121,8 +122,8 @@ vector double fmodd2(vector double x, vector double y)
cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);
/*
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
*/
//norm
mant_x_norm = spu_or(implied_1, frac_x);
@@ -225,8 +226,8 @@ vector double fmodd2(vector double x, vector double y)
shift0 = spu_extract(cnt, 0);
shift1 = spu_extract(cnt, 2);
/*
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
*/
norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
@@ -236,11 +237,11 @@ vector double fmodd2(vector double x, vector double y)
//denorm
/*
shift = spu_add((vec_int4)exp_y, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
shift = spu_add((vec_int4)exp_y, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
*/
shift = spu_add(power, -1);
shift0 = spu_extract(shift, 0);
@@ -278,25 +279,4 @@ vector double fmodd2(vector double x, vector double y)
return ((vec_double2)result);
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 eq = spu_cmpeq(aa, bb);
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
#endif

View File

@@ -27,60 +27,68 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMODF4_H___
#define ___SIMD_MATH_FMODF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#include <simdmath/fabsf4.h>
#include <simdmath/copysignf4.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
vector float
fmodf4 (vector float x, vector float y)
static inline vector float
_fmodf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, r1, r2, i;
vec_uint4 inrange;
vec_float4 q, xabs, yabs, qabs, xabs2;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, r1, r2, i;
vec_uint4 inrange;
// Find i = truncated_integer(|x/y|)
// Find i = truncated_integer(|x/y|)
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
q = divf4( x, y );
xabs = fabsf4( x );
yabs = fabsf4( y );
qabs = fabsf4( q );
xabs2 = spu_add( xabs, xabs );
q = _divf4( x, y );
xabs = _fabsf4( x );
yabs = _fabsf4( y );
qabs = _fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
r1 = spu_nmsub( i1, yabs, xabs2 );
r2 = spu_nmsub( i2, yabs, xabs2 );
r1 = spu_nmsub( i1, yabs, xabs2 );
r2 = spu_nmsub( i2, yabs, xabs2 );
i = i0;
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
i = i0;
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
i = copysignf4( i, q );
i = _copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}
#endif

View File

@@ -0,0 +1,83 @@
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FPCLASSIFYD2_H___
#define ___SIMD_MATH_FPCLASSIFYD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector signed long long
_fpclassifyd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 mask;
vec_llong2 classtype;
vec_uint4 cmpgt, cmpeq;
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
classtype = spu_splats((long long)FP_NORMAL);
//FP_NAN: all-ones exponent and non-zero mantissa
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
//FP_INFINITE: all-ones exponent and zero mantissa
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
//FP_ZERO: zero exponent and zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
return classtype;
}
#endif

View File

@@ -27,52 +27,41 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FPCLASSIFYF4_H___
#define ___SIMD_MATH_FPCLASSIFYF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_NAN
#define FP_NAN (0)
#endif
#ifndef FP_INFINITE
#define FP_INFINITE (1)
#endif
#ifndef FP_ZERO
#define FP_ZERO (2)
#endif
#ifndef FP_SUBNORMAL
#define FP_SUBNORMAL (3)
#endif
#ifndef FP_NORMAL
#define FP_NORMAL (4)
#endif
vector signed int
fpclassifyf4 (vector float x)
static inline vector signed int
_fpclassifyf4 (vector float x)
{
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
vec_uint4 mask;
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
vec_int4 classtype = (vec_int4)zero;
vec_uint4 mask;
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
vec_int4 classtype = (vec_int4)zero;
//FP_NAN: NaN not supported on SPU, never return FP_NAN
//FP_NAN: NaN not supported on SPU, never return FP_NAN
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
//FP_ZERO: zero exponent and zero mantissa
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
unclassified = spu_andc( unclassified, mask );
//FP_ZERO: zero exponent and zero mantissa
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
unclassified = spu_andc( unclassified, mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
unclassified = spu_andc( unclassified, mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
unclassified = spu_andc( unclassified, mask );
//FP_NORMAL: none of the above
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
//FP_NORMAL: none of the above
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
return classtype;
return classtype;
}
#endif

View File

@@ -0,0 +1,98 @@
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FREXPD2_H___
#define ___SIMD_MATH_FREXPD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#define __FREXPD_DBL_NAN 0x7FF8000000000000ull
static inline vector double
_frexpd2 (vector double x, vector signed long long *pexp)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_ullong2 e0, x0, x1;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, sh, ex;
vec_double2 fr, frac = (vec_double2)zero;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
frac = spu_sel( frac, (vec_double2)spu_splats(__FREXPD_DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
frac = spu_sel( frac, x , isinf );
//x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
frac = spu_sel( frac, (vec_double2)zero , iszero );
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
//Integer Exponent: if x is normal or subnormal
//...shift left to normalize fraction, zero shift if normal
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
fr = (vec_double2)spu_sel( x1, x0, maskdw );
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
fr = spu_sel( fr, x, sign );
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
return frac;
}
#endif

View File

@@ -27,21 +27,26 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FREXPF4_H___
#define ___SIMD_MATH_FREXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
frexpf4 (vector float x, vector signed int *pexp)
static inline vector float
_frexpf4 (vector float x, vector signed int *pexp)
{
vec_int4 zeros = spu_splats((int)0);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 e1 = spu_and((vec_int4)x, expmask);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
*pexp = spu_sel(e2, zeros, zeromask);
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
return spu_sel(m2, (vec_float4)zeros, zeromask);
}
#endif

View File

@@ -27,14 +27,21 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_HYPOTD2_H___
#define ___SIMD_MATH_HYPOTD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
hypotd2 (vector double x, vector double y)
#include <simdmath/sqrtd2.h>
static inline vector double
_hypotd2 (vector double x, vector double y)
{
vec_double2 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return sqrtd2(sum);
return _sqrtd2(sum);
}
#endif

View File

@@ -27,14 +27,21 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_HYPOTF4_H___
#define ___SIMD_MATH_HYPOTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
hypotf4 (vector float x, vector float y)
#include <simdmath/sqrtf4.h>
static inline vector float
_hypotf4 (vector float x, vector float y)
{
vec_float4 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return sqrtf4(sum);
return _sqrtf4(sum);
}
#endif

View File

@@ -0,0 +1,83 @@
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ILOGBD2_H___
#define ___SIMD_MATH_ILOGBD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <limits.h>
#include <math.h>
static inline vector signed long long
_ilogbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, iszeroinf;
vec_llong2 ilogb = (vec_llong2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
return ilogb;
}
#endif

View File

@@ -27,22 +27,24 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ILOGBF4_H___
#define ___SIMD_MATH_ILOGBF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <limits.h>
#include <math.h>
#ifndef FP_ILOGB0
#define FP_ILOGB0 ((int)0x80000001)
#endif
vector signed int
ilogbf4 (vector float x)
static inline vector signed int
_ilogbf4 (vector float x)
{
vec_int4 minus127 = spu_splats((int)-127);
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
vec_uint4 zeromask = spu_cmpeq(e1, 0);
vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
}
#endif

View File

@@ -30,10 +30,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IRINTF4_H___
#define ___SIMD_MATH_IRINTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int irintf4(vector float in)
static inline vector signed int
_irintf4(vector float in)
{
return spu_convts(in,0);
}
#endif

View File

@@ -29,10 +29,14 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IROUNDF4_H___
#define ___SIMD_MATH_IROUNDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int iroundf4(vector float in)
static inline vector signed int
_iroundf4(vector float in)
{
vec_int4 exp, out;
vec_uint4 addend;
@@ -53,3 +57,5 @@ vector signed int iroundf4(vector float in)
return (out);
}
#endif

View File

@@ -27,20 +27,25 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IS0DENORMD2_H___
#define ___SIMD_MATH_IS0DENORMD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
is0denormd2 (vector double x)
static inline vector unsigned long long
_is0denormd2 (vector double x)
{
vec_double2 xexp;
vec_ullong2 cmp;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_double2 xexp;
vec_ullong2 cmp;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
cmp = spu_shuffle( cmp, cmp, even );
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
cmp = spu_shuffle( cmp, cmp, even );
return cmp;
return cmp;
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IS0DENORMF4_H___
#define ___SIMD_MATH_IS0DENORMF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
is0denormf4 (vector float x)
static inline vector unsigned int
_is0denormf4 (vector float x)
{
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
}
#endif

View File

@@ -27,28 +27,35 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISEQUALD2_H___
#define ___SIMD_MATH_ISEQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isequald2 (vector double x, vector double y)
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
spu_or( isnand2( x ), isnand2( y ) ) );
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
spu_or( _isnand2( x ), _isnand2( y ) ) );
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISEQUALF4_H___
#define ___SIMD_MATH_ISEQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isequalf4 (vector float x, vector float y)
static inline vector unsigned int
_isequalf4 (vector float x, vector float y)
{
return spu_cmpeq(x, y);
return spu_cmpeq(x, y);
}
#endif

View File

@@ -27,21 +27,25 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISFINITED2_H___
#define ___SIMD_MATH_ISFINITED2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isfinited2 (vector double x)
static inline vector unsigned long long
_isfinited2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Finite unless NaN or Inf, check for 'not all-ones exponent'
//Finite unless NaN or Inf, check for 'not all-ones exponent'
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
return cmpr;
}
#endif

View File

@@ -27,14 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISFINITEF4_H___
#define ___SIMD_MATH_ISFINITEF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isfinitef4 (vector float x)
static inline vector unsigned int
_isfinitef4 (vector float x)
{
(void)x;
(void)x;
// NaN, INF not supported on SPU, result always a mask of ones
return spu_splats((unsigned int)0xffffffff);
// NaN, INF not supported on SPU, result always a mask of ones
return spu_splats((unsigned int)0xffffffff);
}
#endif

View File

@@ -27,39 +27,45 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATERD2_H___
#define ___SIMD_MATH_ISGREATERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isgreaterd2 (vector double x, vector double y)
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -27,41 +27,47 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATEREQUALD2_H___
#define ___SIMD_MATH_ISGREATEREQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isgreaterequald2 (vector double x, vector double y)
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isgreaterequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
}
#endif

View File

@@ -27,15 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATEREQUALF4_H___
#define ___SIMD_MATH_ISGREATEREQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isgreaterequalf4 (vector float x, vector float y)
static inline vector unsigned int
_isgreaterequalf4 (vector float x, vector float y)
{
vec_uint4 var;
vec_uint4 var;
var = spu_cmpgt(y, x);
var = spu_cmpgt(y, x);
return spu_nor(var, var);
return spu_nor(var, var);
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATERF4_H___
#define ___SIMD_MATH_ISGREATERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isgreaterf4 (vector float x, vector float y)
static inline vector unsigned int
_isgreaterf4 (vector float x, vector float y)
{
return spu_cmpgt(x, y);
return spu_cmpgt(x, y);
}
#endif

View File

@@ -27,21 +27,25 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISINFD2_H___
#define ___SIMD_MATH_ISINFD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isinfd2 (vector double x)
static inline vector unsigned long long
_isinfd2 (vector double x)
{
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_double2 xabs;
vec_ullong2 cmp;
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_double2 xabs;
vec_ullong2 cmp;
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
return cmp;
return cmp;
}
#endif

View File

@@ -27,14 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISINFF4_H___
#define ___SIMD_MATH_ISINFF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isinff4 (vector float x)
static inline vector unsigned int
_isinff4 (vector float x)
{
(void)x;
(void)x;
// INF not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
// INF not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -27,38 +27,45 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSD2_H___
#define ___SIMD_MATH_ISLESSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessd2 (vector double x, vector double y)
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_islessd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -27,40 +27,47 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSEQUALD2_H___
#define ___SIMD_MATH_ISLESSEQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessequald2 (vector double x, vector double y)
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_islessequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
}
#endif

View File

@@ -27,15 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSEQUALF4_H___
#define ___SIMD_MATH_ISLESSEQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessequalf4 (vector float x, vector float y)
static inline vector unsigned int
_islessequalf4 (vector float x, vector float y)
{
vec_uint4 var;
vec_uint4 var;
var = spu_cmpgt(x, y);
var = spu_cmpgt(x, y);
return spu_nor(var, var);
return spu_nor(var, var);
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSF4_H___
#define ___SIMD_MATH_ISLESSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessf4 (vector float x, vector float y)
static inline vector unsigned int
_islessf4 (vector float x, vector float y)
{
return spu_cmpgt(y, x);
return spu_cmpgt(y, x);
}
#endif

View File

@@ -27,29 +27,35 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSGREATERD2_H___
#define ___SIMD_MATH_ISLESSGREATERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
#include <simdmath/isnand2.h>
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
static inline vector unsigned long long
_islessgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -27,15 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSGREATERF4_H___
#define ___SIMD_MATH_ISLESSGREATERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessgreaterf4 (vector float x, vector float y)
static inline vector unsigned int
_islessgreaterf4 (vector float x, vector float y)
{
vec_uint4 var;
vec_uint4 var;
var = spu_cmpeq(x, y);
var = spu_cmpeq(x, y);
return spu_nor(var, var);
return spu_nor(var, var);
}
#endif

View File

@@ -27,26 +27,30 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNAND2_H___
#define ___SIMD_MATH_ISNAND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isnand2 (vector double x)
static inline vector unsigned long long
_isnand2 (vector double x)
{
vec_double2 xneg;
vec_ullong2 cmpgt, cmpeq, cmpnan;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
vec_double2 xneg;
vec_ullong2 cmpgt, cmpeq, cmpnan;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return cmpnan;
return cmpnan;
}
#endif

View File

@@ -27,14 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNANF4_H___
#define ___SIMD_MATH_ISNANF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isnanf4 (vector float x)
static inline vector unsigned int
_isnanf4 (vector float x)
{
(void)x;
(void)x;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -27,23 +27,27 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNORMALD2_H___
#define ___SIMD_MATH_ISNORMALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isnormald2 (vector double x)
static inline vector unsigned long long
_isnormald2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Normal unless nan, infinite, denorm, or zero
//Normal unless nan, infinite, denorm, or zero
//Check for 'not zero or all-ones exponent'
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
//Check for 'not zero or all-ones exponent'
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
return cmpr;
}
#endif

View File

@@ -27,12 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNORMALF4_H___
#define ___SIMD_MATH_ISNORMALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isnormalf4 (vector float x)
static inline vector unsigned int
_isnormalf4 (vector float x)
{
// NaN, INF not supported on SPU; normal unless zero
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
// NaN, INF not supported on SPU; normal unless zero
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
}
#endif

View File

@@ -27,37 +27,41 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISUNORDEREDD2_H___
#define ___SIMD_MATH_ISUNORDEREDD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isunorderedd2 (vector double x, vector double y)
static inline vector unsigned long long
_isunorderedd2 (vector double x, vector double y)
{
vec_double2 neg;
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
vec_double2 neg;
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
//Check if x is nan
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
//Check if x is nan
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
//Check if y is nan
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
//Check if y is nan
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return spu_or( cmpnanx, cmpnany );
return spu_or( cmpnanx, cmpnany );
}
#endif

View File

@@ -27,15 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISUNORDEREDF4_H___
#define ___SIMD_MATH_ISUNORDEREDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isunorderedf4 (vector float x, vector float y)
static inline vector unsigned int
_isunorderedf4 (vector float x, vector float y)
{
(void)x;
(void)y;
(void)x;
(void)y;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -29,17 +29,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LDEXPD2_H___
#define ___SIMD_MATH_LDEXPD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
ldexpd2(vector double x, vector signed long long ex)
static inline vector double
_ldexpd2(vector double x, vector signed long long ex)
{
vec_int4 e1, e2;
vec_int4 min = spu_splats(-2099);
// vec_int4 min = spu_splats(-2044);
// vec_int4 min = spu_splats(-2044);
vec_int4 max = spu_splats( 2098);
// vec_int4 max = spu_splats( 2046);
// vec_int4 max = spu_splats( 2046);
vec_uint4 cmp_min, cmp_max;
vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
vec_double2 f1, f2;
@@ -83,7 +86,7 @@ ldexpd2(vector double x, vector signed long long ex)
/* Compute the product x * 2^e1 * 2^e2
*/
// out = spu_mul(spu_mul(x, f1), f2);
// out = spu_mul(spu_mul(x, f1), f2);
// check floating point register DENORM bit
vec_uint4 fpscr0, fpscr;
@@ -159,7 +162,7 @@ ldexpd2(vector double x, vector signed long long ex)
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
minmask = spu_shuffle(minmask, minmask, splat_msb);
@@ -245,9 +248,9 @@ ldexpd2(vector double x, vector signed long long ex)
vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
// select right answer
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask);
x = spu_sel(x, (vec_double2)zeros, minmask);
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask);
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask);
x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask);
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask);
out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);
@@ -260,4 +263,4 @@ ldexpd2(vector double x, vector signed long long ex)
return out;
}
#endif

View File

@@ -27,26 +27,30 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LDEXPF4_H___
#define ___SIMD_MATH_LDEXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
ldexpf4 (vector float x, vector signed int exp)
static inline vector float
_ldexpf4 (vector float x, vector signed int exp)
{
vec_int4 zeros = spu_splats(0);
vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
vec_int4 e2 = spu_rlmask(e1,-23);
vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255);
vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp);
minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros));
vec_uint4 maxmask = spu_cmpgt(exp, 255);
vec_uint4 minmask = spu_cmpgt(spu_splats(-255), exp);
minmask = spu_or (minmask, spu_cmpeq(x, (vec_float4)zeros));
vec_int4 esum = spu_add(e2, exp);
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255));
maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF));
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
maxmask = spu_or (maxmask, spu_cmpgt(esum, 255));
maxmask = spu_and(maxmask, spu_splats(0x7FFFFFFFU));
minmask = spu_or (minmask, spu_cmpgt(zeros, esum));
x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
x = spu_sel(x, (vec_float4)zeros, minmask);
@@ -54,3 +58,5 @@ ldexpf4 (vector float x, vector signed int exp)
x = spu_sel(x, (vec_float4)maxmask, maxmask);
return x;
}
#endif

View File

@@ -27,11 +27,14 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLABSI2_H___
#define ___SIMD_MATH_LLABSI2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed long long
llabsi2 (vector signed long long in)
static inline vector signed long long
_llabsi2 (vector signed long long in)
{
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
@@ -43,3 +46,5 @@ llabsi2 (vector signed long long in)
return ((vec_llong2)(res));
}
#endif

View File

@@ -0,0 +1,85 @@
/* lldivi2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIVI2_H___
#define ___SIMD_MATH_LLDIVI2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_lldiv.h>
#include <simdmath/lldivu2.h>
static inline vector signed long long
__lldivi2_negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline lldivi2_t
_lldivi2 (vector signed long long numer, vector signed long long denom)
{
lldivi2_t res;
lldivu2_t resAbs;
vec_ullong2 numerAbs, denomAbs;
vec_uint4 numerPos, denomPos, quotNeg;
// Determine whether result needs sign change
numerPos = spu_cmpgt((vec_int4)numer, -1);
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
denomPos = spu_cmpgt((vec_int4)denom, -1);
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(numer), numer, (vec_ullong2)numerPos);
denomAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(denom), denom, (vec_ullong2)denomPos);
// Get difference of leading zeros.
resAbs = _lldivu2(numerAbs, denomAbs);
res.quot = spu_sel((vec_llong2)resAbs.quot, __lldivi2_negatell2((vec_llong2)resAbs.quot),
(vec_ullong2)quotNeg);
res.rem = spu_sel(__lldivi2_negatell2((vec_llong2)resAbs.rem), (vec_llong2)resAbs.rem,
(vec_ullong2)numerPos);
return res;
}
#endif

View File

@@ -27,46 +27,51 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIVU2_H___
#define ___SIMD_MATH_LLDIVU2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "lldiv.h"
#include <simdmath/_lldiv.h>
// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
static inline lldivu2_t
_lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
{
lldivu2_t res;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
lldivu2_t res;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
// Get difference of leading zeros.
denomZeros = (vec_uint4)ll_spu_cntlz( denom );
numerZeros = (vec_uint4)ll_spu_cntlz( numer );
denomZeros = (vec_uint4)__ll_spu_cntlz( denom );
numerZeros = (vec_uint4)__ll_spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
// Shift denom to align leading one with numerator's
denomShifted = ll_spu_sl( denom, (vec_ullong2)shift );
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) );
denomShifted = __ll_spu_sl( denom, (vec_ullong2)shift );
oneShifted = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), __ll_spu_cmpeq_zero( denom ) );
// Shift left all leading zeros.
// Shift left all leading zeros.
denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros );
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
denomLeft = __ll_spu_sl( denom, (vec_ullong2)denomZeros );
oneLeft = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
quot = spu_splats(0ull);
quot = spu_splats(0ull);
do
{
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
do
{
cont = __ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
newQuot = spu_or( quot, oneShifted );
@@ -74,25 +79,26 @@ lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long de
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = ll_spu_cmpgt( denomShifted, numer );
newNum = ll_spu_sub( numer, denomShifted );
skip = __ll_spu_cmpgt( denomShifted, numer );
newNum = __ll_spu_sub( numer, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
numerZeros = (vec_uint4)spu_sel( __ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
oneShifted = __ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = __ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
res.quot = quot;
res.rem = numer;
return res;
}
#endif

View File

@@ -28,6 +28,9 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLRINTD2_H___
#define ___SIMD_MATH_LLRINTD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -35,8 +38,8 @@
// Handles no exception
// over flow will return unspecified data
vector signed long long
llrintd2 (vector double in)
static inline vector signed long long
_llrintd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
@@ -67,8 +70,8 @@ llrintd2 (vector double in)
bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));
/* Determine how many bits to shift the mantissa to correctly
@@ -102,9 +105,11 @@ llrintd2 (vector double in)
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}
#endif

View File

@@ -28,6 +28,9 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLRINTF4_H___
#define ___SIMD_MATH_LLRINTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -35,8 +38,8 @@
// Handles no exception
// over flow will return unspecified data
llroundf4_t
llrintf4 (vector float in)
static inline llroundf4_t
_llrintf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
@@ -100,3 +103,5 @@ llrintf4 (vector float in)
return res;
}
#endif

View File

@@ -28,6 +28,9 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLROUNDD2_H___
#define ___SIMD_MATH_LLROUNDD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -35,8 +38,8 @@
// Handles no exception
// over flow will return unspecified data
vector signed long long
llroundd2 (vector double in)
static inline vector signed long long
_llroundd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
@@ -72,7 +75,7 @@ llroundd2 (vector double in)
*/
addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend = spu_rlmask(addend, -7);
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));
/* Compute the two's complement of the mantissa if the
@@ -84,9 +87,11 @@ llroundd2 (vector double in)
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}
#endif

View File

@@ -28,6 +28,9 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLROUNDF4_H___
#define ___SIMD_MATH_LLROUNDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
@@ -35,8 +38,8 @@
// Handles no exception
// over flow will return unspecified data
llroundf4_t
llroundf4 (vector float in)
static inline llroundf4_t
_llroundf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
@@ -90,8 +93,8 @@ llroundf4 (vector float in)
addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend0 = spu_rlmask(addend0, -7);
addend1 = spu_rlmask(addend1, -7);
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));
@@ -113,3 +116,5 @@ llroundf4 (vector float in)
return res;
}
#endif

View File

@@ -27,53 +27,57 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG10F4_H___
#define ___SIMD_MATH_LOG10F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#define _LOG10F_H_loga2msb ((float)0.3010299205780f)
#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f)
#define _LOG10F_H_logaemsb ((float)0.4342944622040f)
#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f)
#define _LOG10F_H_logae ((float)0.4342944819033f)
#define __LOG10F_loga2msb 0.3010299205780f
#define __LOG10F_loga2lsb 7.5085978266e-8f
#define __LOG10F_logaemsb 0.4342944622040f
#define __LOG10F_logaelsb 1.9699272335e-8f
#define __LOG10F_logae 0.4342944819033f
#define _LOG10F_H_c0 ((float)(0.2988439998f))
#define _LOG10F_H_c1 ((float)(0.3997655209f))
#define _LOG10F_H_c2 ((float)(0.6666679125f))
#define __LOG10F_c0 0.2988439998f
#define __LOG10F_c1 0.3997655209f
#define __LOG10F_c2 0.6666679125f
vector float
log10f4 (vector float x)
static inline vector float
_log10f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = divf4 (x1, spu_add(x, ones));
vec_float4 z = _divf4 (x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1));
polyw = spu_madd(polyw , w, spu_splats(_LOG10F_H_c2));
polyw = spu_madd(spu_splats(__LOG10F_c0), w, spu_splats(__LOG10F_c1));
polyw = spu_madd(polyw , w, spu_splats(__LOG10F_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 wnew = spu_convtf(xexp,0);
vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1,
spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew));
vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1,
spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew,
spu_mul(spu_splats(_LOG10F_H_logae), yneg))
vec_float4 zz1 = spu_madd(spu_splats(__LOG10F_logaemsb), x1,
spu_mul(spu_splats(__LOG10F_loga2msb),wnew));
vec_float4 zz2 = spu_madd(spu_splats(__LOG10F_logaelsb), x1,
spu_madd(spu_splats(__LOG10F_loga2lsb), wnew,
spu_mul(spu_splats(__LOG10F_logae), yneg))
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}
#endif

View File

@@ -27,25 +27,34 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG1PF4_H___
#define ___SIMD_MATH_LOG1PF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
log1pf4 (vector float x)
#include <simdmath/logf4.h>
#include <simdmath/divf4.h>
static inline vector float
_log1pf4 (vector float x)
{
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
spu_cmpgt(spu_splats(0.5f), x));
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
spu_cmpgt(spu_splats(0.5f), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f));
d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f));
d0 = spu_madd(x , spu_splats(1.5934420741f), spu_splats(0.8952856868f));
d1 = spu_madd(x , spu_splats(0.1198195734f), spu_splats(0.8377145063f));
d1 = spu_madd(x2, d1, d0);
n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f));
n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f));
n0 = spu_madd(x , spu_splats(1.1457993413f), spu_splats(0.8952856678f));
n1 = spu_madd(x , spu_splats(0.0082862580f), spu_splats(0.3394238808f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(logf4(spu_add(x, spu_splats(1.0f))),
spu_mul(x, divf4(n1, d1)),
return spu_sel(_logf4(spu_add(x, spu_splats(1.0f))),
spu_mul(x, _divf4(n1, d1)),
nearzeromask);
}
#endif

View File

@@ -27,45 +27,52 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG2F4_H___
#define ___SIMD_MATH_LOG2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _LOG2F_H_l2emsb ((float)1.4426950216293f)
#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f)
#define _LOG2F_H_l2e ((float)1.4426950408890f)
#include <simdmath/divf4.h>
#define _LOG2F_H_c0 ((float)(0.2988439998f))
#define _LOG2F_H_c1 ((float)(0.3997655209f))
#define _LOG2F_H_c2 ((float)(0.6666679125f))
#define __LOG2F_l2emsb 1.4426950216293f
#define __LOG2F_l2elsb 1.9259629911e-8f
#define __LOG2F_l2e 1.4426950408890f
vector float
log2f4 (vector float x)
#define __LOG2F_c0 0.2988439998f
#define __LOG2F_c1 0.3997655209f
#define __LOG2F_c2 0.6666679125f
static inline vector float
_log2f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uint4)expmask);
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = divf4(x1, spu_add(x, ones));
vec_float4 z = _divf4(x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1));
polyw = spu_madd(polyw , w, spu_splats(_LOG2F_H_c2));
polyw = spu_madd(spu_splats(__LOG2F_c0), w, spu_splats(__LOG2F_c1));
polyw = spu_madd(polyw , w, spu_splats(__LOG2F_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0));
vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1,
spu_mul(spu_splats(_LOG2F_H_l2e), yneg)
vec_float4 zz1 = spu_madd(spu_splats(__LOG2F_l2emsb), x1, spu_convtf(xexp,0));
vec_float4 zz2 = spu_madd(spu_splats(__LOG2F_l2elsb), x1,
spu_mul(spu_splats(__LOG2F_l2e), yneg)
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}
#endif

View File

@@ -0,0 +1,86 @@
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGBD2_H___
#define ___SIMD_MATH_LOGBD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector double
_logbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_double2 logb = (vec_double2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
logb = spu_sel( logb, (vec_double2)spu_splats(0x7FF8000000000000ll), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats(__builtin_huge_val()), isinf );
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats(-__builtin_huge_val()), iszero );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
spu_nor( isnan, spu_or( isinf, iszero ) ) );
return logb;
}
#endif

View File

@@ -27,20 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGBF4_H___
#define ___SIMD_MATH_LOGBF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef HUGE_VALF
#define HUGE_VALF __builtin_huge_valf ()
#endif
vector float
logbf4 (vector float x)
static inline vector float
_logbf4 (vector float x)
{
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
vec_uint4 zeromask = spu_cmpeq(e1, 0);
e1 = spu_sub(e1, spu_splats((int)0x3F800000));
return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
}
#endif

Some files were not shown because too many files have changed in this diff Show More