added updated version of simdmathlibrary-1.0.1
This commit is contained in:
@@ -30,11 +30,12 @@
|
||||
# All that you do to add a file is edit OBJS, the rest will just work
|
||||
|
||||
prefix = /usr
|
||||
prefix_spu = $(prefix)/spu
|
||||
DESTDIR =
|
||||
|
||||
OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \
|
||||
is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \
|
||||
sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
|
||||
sinf4.o isgreaterd2.o sind2.o sincosd2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
|
||||
rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \
|
||||
negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \
|
||||
cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \
|
||||
@@ -51,7 +52,7 @@ OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o
|
||||
fmodd2.o remainderd2.o
|
||||
|
||||
|
||||
INCLUDES_SPU = -I../
|
||||
INCLUDES_SPU = -I. -I../common
|
||||
|
||||
CROSS_SPU = spu-
|
||||
AR_SPU = $(CROSS_SPU)ar
|
||||
@@ -66,6 +67,7 @@ INSTALL = install
|
||||
|
||||
MAKE_DEFS = \
|
||||
prefix='$(prefix)' \
|
||||
prefix_spu='$(prefix_spu)' \
|
||||
DESTDIR='$(DESTDIR)' \
|
||||
LIB_BASE='$(LIB_BASE)' \
|
||||
LIB_NAME='$(LIB_NAME)' \
|
||||
@@ -89,43 +91,28 @@ $(STATIC_LIB): $(OBJS)
|
||||
$(RANLIB_SPU) $@
|
||||
|
||||
install: $(STATIC_LIB)
|
||||
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include
|
||||
$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/
|
||||
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib
|
||||
$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB)
|
||||
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include
|
||||
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/include/simdmath
|
||||
$(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_spu)/include/simdmath/
|
||||
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_spu)/lib
|
||||
$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix_spu)/lib/$(STATIC_LIB)
|
||||
|
||||
clean:
|
||||
cd tests; $(MAKE) $(MAKE_DEFS) clean
|
||||
rm -f $(OBJS)
|
||||
rm -f $(STATIC_LIB)
|
||||
|
||||
$(OBJS): ../simdmath.h
|
||||
$(OBJS): ../common/simdmath.h
|
||||
|
||||
check: $(STATIC_LIB)
|
||||
cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check
|
||||
|
||||
|
||||
# Some Objects have special header files.
|
||||
sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h
|
||||
lldivu2.o lldivi2.o : lldiv.h
|
||||
sinf4.o sind2.o sincosf4.o cosd2.o: ../common/simdmath/_sincos.h
|
||||
lldivu2.o lldivi2.o : simdmath/_lldiv.h
|
||||
|
||||
|
||||
|
||||
%.o: %.c
|
||||
%.o: ../common/%.c simdmath/%.h
|
||||
$(CC_SPU) $(CFLAGS_SPU) -c $<
|
||||
|
||||
#----------
|
||||
# C++
|
||||
#----------
|
||||
%.o: %.C
|
||||
$(CXX_SPU) $(CFLAGS_SPU) -c $<
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX_SPU) $(CFLAGS_SPU) -c $<
|
||||
|
||||
%.o: %.cc
|
||||
$(CXX_SPU) $(CFLAGS_SPU) -c $<
|
||||
|
||||
%.o: %.cxx
|
||||
$(CXX_SPU) $(CFLAGS_SPU) -c $<
|
||||
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
/* cosd2 - Computes the cosine of the each of two double slots.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include "sincos_c.h"
|
||||
|
||||
vector double
|
||||
cosd2 (vector double x)
|
||||
{
|
||||
vec_double2 xl,xl2,xl3,res;
|
||||
vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
|
||||
vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull);
|
||||
|
||||
// Range reduction using : xl = angle * TwoOverPi;
|
||||
//
|
||||
xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744));
|
||||
|
||||
// Find the quadrant the angle falls in
|
||||
// using: q = (int) (ceil(abs(x))*sign(x))
|
||||
//
|
||||
xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull)));
|
||||
vec_float4 xf = spu_roundtf(xl);
|
||||
vec_int4 q = spu_convts(xf,0);
|
||||
q = spu_shuffle(q,q,copyEven);
|
||||
|
||||
|
||||
// Compute an offset based on the quadrant that the angle falls in
|
||||
//
|
||||
vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3)));
|
||||
|
||||
// Remainder in range [-pi/4..pi/4]
|
||||
//
|
||||
vec_float4 qf = spu_convtf(q,0);
|
||||
vec_double2 qd = spu_extend(qf);
|
||||
vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x);
|
||||
xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1);
|
||||
|
||||
// Check if |xl| is a really small number
|
||||
//
|
||||
vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull));
|
||||
vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl);
|
||||
|
||||
// Compute x^2 and x^3
|
||||
//
|
||||
xl2 = spu_mul(xl,xl);
|
||||
xl3 = spu_mul(xl2,xl);
|
||||
|
||||
// Compute both the sin and cos of the angles
|
||||
// using a polynomial expression:
|
||||
// cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and
|
||||
// sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5)
|
||||
//
|
||||
|
||||
vec_double2 ct0 = spu_mul(xl2,xl2);
|
||||
vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D));
|
||||
vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D));
|
||||
vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D));
|
||||
vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D));
|
||||
vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D));
|
||||
vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D));
|
||||
vec_double2 ct4 = spu_madd(ct2,ct0,ct3);
|
||||
vec_double2 st4 = spu_madd(st2,ct0,st3);
|
||||
vec_double2 ct5 = spu_mul(ct0,ct0);
|
||||
|
||||
vec_double2 ct6 = spu_madd(ct5,ct1,ct4);
|
||||
vec_double2 st6 = spu_madd(ct5,st1,st4);
|
||||
|
||||
vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0));
|
||||
vec_double2 sx = spu_madd(st6,xl3,xl);
|
||||
|
||||
// Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0
|
||||
//
|
||||
sx = spu_sel(sx,xl,isTiny);
|
||||
cx = spu_sel(cx,spu_splats(1.0),isTiny);
|
||||
|
||||
// Use the cosine when the offset is odd and the sin
|
||||
// when the offset is even
|
||||
//
|
||||
vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
|
||||
res = spu_sel(cx,sx,mask1);
|
||||
|
||||
// Flip the sign of the result when (offset mod 4) = 1 or 2
|
||||
//
|
||||
vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
|
||||
mask2 = spu_shuffle(mask2,mask2,copyEven);
|
||||
res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2);
|
||||
// if input = +/-Inf return NAN
|
||||
//
|
||||
res = spu_sel(res, nan, isnand2 (x));
|
||||
|
||||
// if input = 0 or denorm return or 1.0
|
||||
//
|
||||
vec_ullong2 zeroMask = is0denormd2 (x);
|
||||
res = spu_sel(res,spu_splats(1.0),zeroMask);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include "sincos_c.h"
|
||||
|
||||
vector float
|
||||
cosf4 (vector float x)
|
||||
{
|
||||
vec_float4 xl,xl2,xl3,res;
|
||||
vec_int4 q;
|
||||
|
||||
// Range reduction using : xl = angle * TwoOverPi;
|
||||
//
|
||||
xl = spu_mul(x, spu_splats(0.63661977236f));
|
||||
|
||||
// Find the quadrant the angle falls in
|
||||
// using: q = (int) (ceil(abs(xl))*sign(xl))
|
||||
//
|
||||
xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000)));
|
||||
q = spu_convts(xl,0);
|
||||
|
||||
|
||||
// Compute an offset based on the quadrant that the angle falls in
|
||||
//
|
||||
vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3)));
|
||||
|
||||
// Remainder in range [-pi/4..pi/4]
|
||||
//
|
||||
vec_float4 qf = spu_convtf(q,0);
|
||||
vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x);
|
||||
xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1);
|
||||
|
||||
// Compute x^2 and x^3
|
||||
//
|
||||
xl2 = spu_mul(xl,xl);
|
||||
xl3 = spu_mul(xl2,xl);
|
||||
|
||||
|
||||
// Compute both the sin and cos of the angles
|
||||
// using a polynomial expression:
|
||||
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
|
||||
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
|
||||
//
|
||||
vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1));
|
||||
vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1));
|
||||
|
||||
vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2));
|
||||
vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2));
|
||||
|
||||
vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f));
|
||||
vec_float4 sx = spu_madd(st2,xl3,xl);
|
||||
|
||||
// Use the cosine when the offset is odd and the sin
|
||||
// when the offset is even
|
||||
//
|
||||
vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
|
||||
res = spu_sel(cx,sx,mask1);
|
||||
|
||||
// Flip the sign of the result when (offset mod 4) = 1 or 2
|
||||
//
|
||||
vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
|
||||
res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2);
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
@@ -1,109 +0,0 @@
|
||||
/* divi4 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
|
||||
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
divi4_t divi4 (vector signed int numer, vector signed int denom)
|
||||
{
|
||||
divi4_t res;
|
||||
vec_int4 quot, newQuot, shift;
|
||||
vec_uint4 numerPos, denomPos, quotNeg;
|
||||
vec_uint4 numerAbs, denomAbs;
|
||||
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
|
||||
vec_uint4 newNum, skip, cont;
|
||||
int anyCont;
|
||||
|
||||
// Determine whether result needs sign change
|
||||
|
||||
numerPos = spu_cmpgt( numer, -1 );
|
||||
denomPos = spu_cmpgt( denom, -1 );
|
||||
quotNeg = spu_xor( numerPos, denomPos );
|
||||
|
||||
// Use absolute values of numerator, denominator
|
||||
|
||||
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
|
||||
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
|
||||
|
||||
// Get difference of leading zeros.
|
||||
// Any possible negative value will be interpreted as a shift > 31
|
||||
|
||||
denomZeros = spu_cntlz( denomAbs );
|
||||
numerZeros = spu_cntlz( numerAbs );
|
||||
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
|
||||
// Shift denom to align leading one with numerator's
|
||||
|
||||
denomShifted = spu_sl( denomAbs, (vec_uint4)shift );
|
||||
oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift );
|
||||
oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) );
|
||||
|
||||
// Shift left all leading zeros.
|
||||
|
||||
denomLeft = spu_sl( denomAbs, denomZeros );
|
||||
oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros );
|
||||
|
||||
quot = spu_splats(0);
|
||||
|
||||
do
|
||||
{
|
||||
cont = spu_cmpgt( oneShifted, 0U );
|
||||
anyCont = spu_extract( spu_gather( cont ), 0 );
|
||||
|
||||
newQuot = spu_or( quot, (vec_int4)oneShifted );
|
||||
|
||||
// Subtract shifted denominator from remaining numerator
|
||||
// when denominator is not greater.
|
||||
|
||||
skip = spu_cmpgt( denomShifted, numerAbs );
|
||||
newNum = spu_sub( numerAbs, denomShifted );
|
||||
|
||||
// If denominator is greater, next shift is one more, otherwise
|
||||
// next shift is number of leading zeros of remaining numerator.
|
||||
|
||||
numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
|
||||
shift = (vec_int4)spu_sub( skip, numerZeros );
|
||||
|
||||
oneShifted = spu_rlmask( oneLeft, shift );
|
||||
denomShifted = spu_rlmask( denomLeft, shift );
|
||||
|
||||
quot = spu_sel( newQuot, quot, skip );
|
||||
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
|
||||
}
|
||||
while ( anyCont );
|
||||
|
||||
res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg );
|
||||
res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos );
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef FP_NAN
|
||||
#define FP_NAN (0)
|
||||
#endif
|
||||
#ifndef FP_INFINITE
|
||||
#define FP_INFINITE (1)
|
||||
#endif
|
||||
#ifndef FP_ZERO
|
||||
#define FP_ZERO (2)
|
||||
#endif
|
||||
#ifndef FP_SUBNORMAL
|
||||
#define FP_SUBNORMAL (3)
|
||||
#endif
|
||||
#ifndef FP_NORMAL
|
||||
#define FP_NORMAL (4)
|
||||
#endif
|
||||
|
||||
vector signed long long
|
||||
fpclassifyd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 mask;
|
||||
vec_llong2 classtype;
|
||||
vec_uint4 cmpgt, cmpeq;
|
||||
|
||||
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
|
||||
classtype = spu_splats((long long)FP_NORMAL);
|
||||
|
||||
//FP_NAN: all-ones exponent and non-zero mantissa
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
|
||||
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
|
||||
|
||||
//FP_INFINITE: all-ones exponent and zero mantissa
|
||||
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
|
||||
|
||||
//FP_ZERO: zero exponent and zero mantissa
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
|
||||
|
||||
//FP_SUBNORMAL: zero exponent and non-zero mantissa
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
|
||||
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
|
||||
|
||||
return classtype;
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef DBL_NAN
|
||||
#define DBL_NAN ((long long)0x7FF8000000000000ull)
|
||||
#endif
|
||||
|
||||
vector double
|
||||
frexpd2 (vector double x, vector signed long long *pexp)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, isinf, iszero;
|
||||
vec_ullong2 e0, x0, x1;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, sh, ex;
|
||||
vec_double2 fr, frac = (vec_double2)zero;
|
||||
|
||||
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
|
||||
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
|
||||
frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
|
||||
|
||||
//INF: x is infinite (all-ones exponent and zero mantissa)
|
||||
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
frac = spu_sel( frac, x , isinf );
|
||||
|
||||
//x is zero (zero exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
|
||||
|
||||
frac = spu_sel( frac, (vec_double2)zero , iszero );
|
||||
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal
|
||||
|
||||
//...shift left to normalize fraction, zero shift if normal
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
|
||||
|
||||
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
|
||||
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
|
||||
fr = (vec_double2)spu_sel( x1, x0, maskdw );
|
||||
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
|
||||
fr = spu_sel( fr, x, sign );
|
||||
|
||||
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
|
||||
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
|
||||
|
||||
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
|
||||
return frac;
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef FP_ILOGB0
|
||||
#define FP_ILOGB0 ((int)0x80000001)
|
||||
#endif
|
||||
#ifndef FP_ILOGBNAN
|
||||
#define FP_ILOGBNAN ((int)0x7FFFFFFF)
|
||||
#endif
|
||||
|
||||
vector signed long long
|
||||
ilogbd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, iszeroinf;
|
||||
vec_llong2 ilogb = (vec_llong2)zero;
|
||||
vec_llong2 e1, e2;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, lz1;
|
||||
|
||||
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
|
||||
|
||||
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
|
||||
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
|
||||
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
|
||||
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
|
||||
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
|
||||
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
|
||||
|
||||
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
|
||||
|
||||
return ilogb;
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
/* Common functions for lldivi2/lldivu2
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __LLDIV_H__
|
||||
#define __LLDIV_H__
|
||||
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x);
|
||||
static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count);
|
||||
static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count);
|
||||
static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x);
|
||||
static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y);
|
||||
static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y);
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_cntlz(vector unsigned long long x)
|
||||
{
|
||||
vec_uint4 cnt;
|
||||
|
||||
cnt = spu_cntlz((vec_uint4)x);
|
||||
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
|
||||
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
|
||||
|
||||
return (vec_ullong2)cnt;
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
|
||||
{
|
||||
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
vec_ullong2 x_upper, x_lower;
|
||||
|
||||
// shift upper word
|
||||
x_upper = spu_and(x, mask);
|
||||
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
|
||||
// shift lower word
|
||||
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
|
||||
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
|
||||
|
||||
return spu_sel(x_lower, x_upper, mask);
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
|
||||
{
|
||||
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
vec_ullong2 x_upper, x_lower;
|
||||
vec_uint4 cnt_byte;
|
||||
|
||||
cnt_byte = spu_add((vec_uint4)count, 7);
|
||||
|
||||
// shift upper word
|
||||
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
|
||||
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
|
||||
// shift lower word
|
||||
x_lower = spu_andc(x, mask);
|
||||
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
|
||||
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
|
||||
|
||||
return spu_sel(x_lower, x_upper, mask);
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_cmpeq_zero(vector unsigned long long x)
|
||||
{
|
||||
vec_uint4 cmp;
|
||||
|
||||
cmp = spu_cmpeq((vec_uint4)x, 0);
|
||||
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
|
||||
{
|
||||
vec_uint4 gt;
|
||||
|
||||
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
|
||||
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
|
||||
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
|
||||
{
|
||||
vec_uint4 borrow;
|
||||
|
||||
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
|
||||
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
|
||||
}
|
||||
|
||||
#endif // __LLDIV_H__
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
/* lldivi2 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include "lldiv.h"
|
||||
|
||||
static inline vector signed long long _negatell2 (vector signed long long x);
|
||||
|
||||
static inline vector signed long long
|
||||
_negatell2 (vector signed long long x)
|
||||
{
|
||||
vector signed int zero = (vector signed int){0,0,0,0};
|
||||
vector signed int borrow;
|
||||
|
||||
borrow = spu_genb(zero, (vec_int4)x);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
|
||||
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
|
||||
}
|
||||
|
||||
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
|
||||
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom)
|
||||
{
|
||||
lldivi2_t res;
|
||||
vec_ullong2 numerAbs, denomAbs;
|
||||
vec_uint4 numerPos, denomPos, quotNeg;
|
||||
|
||||
vec_uint4 denomZeros, numerZeros;
|
||||
vec_int4 shift;
|
||||
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
|
||||
vec_ullong2 quot, newQuot;
|
||||
vec_ullong2 newNum, skip, cont;
|
||||
int anyCont;
|
||||
|
||||
// Determine whether result needs sign change
|
||||
|
||||
numerPos = spu_cmpgt((vec_int4)numer, -1);
|
||||
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
|
||||
denomPos = spu_cmpgt((vec_int4)denom, -1);
|
||||
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
|
||||
quotNeg = spu_xor( numerPos, denomPos );
|
||||
|
||||
// Use absolute values of numerator, denominator
|
||||
|
||||
numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos);
|
||||
denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos);
|
||||
|
||||
// Get difference of leading zeros.
|
||||
|
||||
denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs );
|
||||
numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs );
|
||||
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
|
||||
// Shift denom to align leading one with numerator's
|
||||
|
||||
denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift );
|
||||
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
|
||||
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) );
|
||||
|
||||
// Shift left all leading zeros.
|
||||
|
||||
denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros );
|
||||
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
|
||||
|
||||
quot = spu_splats(0ull);
|
||||
|
||||
do
|
||||
{
|
||||
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
|
||||
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
|
||||
|
||||
newQuot = spu_or( quot, oneShifted );
|
||||
|
||||
// Subtract shifted denominator from remaining numerator
|
||||
// when denominator is not greater.
|
||||
|
||||
skip = ll_spu_cmpgt( denomShifted, numerAbs );
|
||||
newNum = ll_spu_sub( numerAbs, denomShifted );
|
||||
|
||||
// If denominator is greater, next shift is one more, otherwise
|
||||
// next shift is number of leading zeros of remaining numerator.
|
||||
|
||||
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
|
||||
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
|
||||
|
||||
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
|
||||
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
|
||||
|
||||
quot = spu_sel( newQuot, quot, skip );
|
||||
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
|
||||
}
|
||||
while ( anyCont );
|
||||
|
||||
res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg);
|
||||
res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef HUGE_VALL
|
||||
#define HUGE_VALL __builtin_huge_vall ()
|
||||
#endif
|
||||
|
||||
#ifndef DBL_INF
|
||||
#define DBL_INF ((long long)0x7FF0000000000000ull)
|
||||
#endif
|
||||
|
||||
#ifndef DBL_NAN
|
||||
#define DBL_NAN ((long long)0x7FF8000000000000ull)
|
||||
#endif
|
||||
|
||||
vector double
|
||||
logbd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, isinf, iszero;
|
||||
vec_double2 logb = (vec_double2)zero;
|
||||
vec_llong2 e1, e2;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, lz1;
|
||||
|
||||
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
|
||||
|
||||
//INF: x is infinite (all-ones exponent and zero mantissa)
|
||||
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf );
|
||||
|
||||
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
|
||||
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
|
||||
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
|
||||
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
|
||||
|
||||
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
|
||||
spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
|
||||
return logb;
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
/* nextafterd2 - find next representable floating-point value towards 2nd param.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
nextafterd2 (vector double xx, vector double yy)
|
||||
{
|
||||
vec_uint4 abs_x, abs_y, sign_x, abs_dif;
|
||||
vec_uint4 is_sub, is_zerox, is_zeroy;
|
||||
vec_uint4 is_equal, is_infy, is_nany;
|
||||
vec_uint4 res0, res1, res;
|
||||
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
|
||||
vec_uint4 vec_one = ((vec_uint4){0,1,0,1});
|
||||
vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1});
|
||||
vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
|
||||
vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
|
||||
vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
|
||||
|
||||
// mask sign bit
|
||||
abs_x = spu_and( (vec_uint4)xx, msk_abs);
|
||||
abs_y = spu_and( (vec_uint4)yy, msk_abs);
|
||||
|
||||
is_zerox = spu_cmpeq( abs_x, vec_zero);
|
||||
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
|
||||
|
||||
// -0 exception
|
||||
sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
|
||||
sign_x = spu_sel(sign_x, vec_zero, is_zerox);
|
||||
|
||||
// if same sign |y| < |x| -> decrease
|
||||
abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
|
||||
is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease
|
||||
is_sub = spu_or(is_sub, abs_dif);
|
||||
is_sub = spu_rlmaska(is_sub, -31);
|
||||
is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
|
||||
|
||||
res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
|
||||
res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
|
||||
res = spu_sel( res0, res1, is_sub); // select increase or decrease
|
||||
res = spu_or( res, sign_x); // set sign
|
||||
|
||||
// check exception
|
||||
// 0 -> -1
|
||||
res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
|
||||
|
||||
// check equal (include 0,-0)
|
||||
is_zeroy = spu_cmpeq( abs_y, vec_zero);
|
||||
is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
|
||||
is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
|
||||
is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
|
||||
is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
|
||||
res = spu_sel(res, (vec_uint4)yy, is_equal);
|
||||
|
||||
// check nan
|
||||
is_infy = spu_cmpeq( abs_y, msk_exp);
|
||||
is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
|
||||
is_nany = spu_and( abs_y, msk_exp);
|
||||
is_nany = spu_cmpeq( is_nany, msk_exp);
|
||||
is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
|
||||
is_nany = spu_sel( is_nany, vec_zero, is_infy);
|
||||
res = spu_sel(res, (vec_uint4)yy, is_nany);
|
||||
|
||||
return (vec_double2)res;
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
/* powf4 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
powf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_int4 zeros = spu_splats((int)0);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
|
||||
|
||||
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
|
||||
|
||||
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
|
||||
vec_float4 absx = spu_andc(x, sbit);
|
||||
vec_float4 absy = spu_andc(y, sbit);
|
||||
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
|
||||
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
|
||||
|
||||
vec_float4 res = exp2f4(spu_mul(y, log2f4(absx)));
|
||||
res = spu_sel(res, spu_or(sbit, res), negmask);
|
||||
|
||||
|
||||
return spu_sel(res, (vec_float4)zeros, zeromask);
|
||||
}
|
||||
|
||||
/*
|
||||
{
|
||||
vec_int4 zeros = spu_splats(0);
|
||||
vec_int4 ones = (vec_int4)spu_splats((char)0xFF);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
|
||||
vec_uchar16 onemask = (vec_uchar16)spu_cmpeq((vec_float4)ones , y);
|
||||
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
|
||||
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
|
||||
vec_float4 absx = spu_andc(x, sbit);
|
||||
vec_float4 absy = spu_andc(y, sbit);
|
||||
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
|
||||
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
@@ -1,313 +0,0 @@
|
||||
/* A vector double is returned that contains the remainder xi REM yi,
|
||||
for the corresponding elements of vector double x and vector double y.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _twice(vec_uint4 aa);
|
||||
|
||||
vector double
|
||||
remainderd2(vector double x, vector double yy)
|
||||
{
|
||||
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
vec_uint4 y_hi;
|
||||
vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
|
||||
vec_uint4 bias;
|
||||
vec_uint4 nan_out, overflow;
|
||||
vec_uint4 result;
|
||||
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
|
||||
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
|
||||
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
|
||||
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
|
||||
vec_uint4 vec_zero = spu_splats((unsigned int)0);
|
||||
vec_uint4 is_zeroy;
|
||||
|
||||
// cut sign
|
||||
abs_x = spu_andc((vec_uint4)x, sign_mask);
|
||||
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
|
||||
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
|
||||
|
||||
|
||||
// check nan out
|
||||
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
|
||||
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
|
||||
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
|
||||
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
|
||||
nan_out = spu_or(nan_out, is_zeroy); // y = 0
|
||||
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
|
||||
|
||||
|
||||
// make y x2
|
||||
abs_2y = _twice(abs_yy); // 2 x y
|
||||
|
||||
/*
|
||||
* use fmodd2 function
|
||||
*/
|
||||
// get remainder of y x2
|
||||
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y);
|
||||
{
|
||||
vec_double2 y = (vec_double2)abs_2y;
|
||||
|
||||
int shiftx0, shiftx1, shifty0, shifty1;
|
||||
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
|
||||
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
|
||||
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
vec_int4 n, shift;
|
||||
vec_uint4 exp_x, exp_y;
|
||||
// , sign;
|
||||
// vec_uint4 abs_x, abs_y;
|
||||
vec_uint4 abs_y;
|
||||
vec_uint4 mant_x, mant_x0, mant_x1;
|
||||
vec_uint4 mant_y, mant_y0, mant_y1;
|
||||
vec_uint4 mant_0, mant_1;
|
||||
vec_uint4 mant_r, mant_l;
|
||||
// vec_uint4 result;
|
||||
vec_uint4 result0, resultx;
|
||||
vec_uint4 zero_x, zero_y;
|
||||
vec_uint4 denorm_x, denorm_y;
|
||||
vec_uint4 cnt, cnt_x, cnt_y;
|
||||
vec_uint4 shift_x, shift_y;
|
||||
vec_uint4 adj_x, adj_y;
|
||||
vec_uint4 z, borrow, mask;
|
||||
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
|
||||
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
|
||||
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
|
||||
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
|
||||
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
|
||||
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
|
||||
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
|
||||
|
||||
// sign = spu_and( (vec_uint4)x, sign_mask);
|
||||
// abs_x = spu_andc((vec_uint4)x, sign_mask);
|
||||
abs_y = spu_andc((vec_uint4)y, sign_mask);
|
||||
exp_x = spu_rlmask(abs_x, -20);
|
||||
exp_y = spu_rlmask(abs_y, -20);
|
||||
// get shift count for denorm
|
||||
cnt_x = spu_cntlz(abs_x);
|
||||
cnt_y = spu_cntlz(abs_y);
|
||||
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
|
||||
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
|
||||
|
||||
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
|
||||
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
|
||||
result0 = spu_or(zero_x, zero_y);
|
||||
result0 = spu_shuffle(result0, result0, splat_hi);
|
||||
|
||||
// 0 - (cnt_x - 11) = 11 - cnt_x
|
||||
shift_x= spu_add(cnt_x, -11);
|
||||
shift_y= spu_add(cnt_y, -11);
|
||||
cnt_x = spu_sub(11, cnt_x);
|
||||
cnt_y = spu_sub(11, cnt_y);
|
||||
|
||||
// count to normalize
|
||||
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
|
||||
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
|
||||
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
|
||||
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
|
||||
|
||||
// for denorm
|
||||
shiftx0 = spu_extract(shift_x, 0);
|
||||
shiftx1 = spu_extract(shift_x, 2);
|
||||
shifty0 = spu_extract(shift_y, 0);
|
||||
shifty1 = spu_extract(shift_y, 2);
|
||||
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
|
||||
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
|
||||
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
|
||||
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
|
||||
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
|
||||
|
||||
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
|
||||
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
|
||||
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
|
||||
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
|
||||
mant_x = spu_or(mant_x, implied_1); // hidden bit
|
||||
mant_y = spu_or(mant_y, implied_1); // hidden bit
|
||||
|
||||
// x < y ?
|
||||
resultx = _vec_gt64(abs_y, abs_x);
|
||||
|
||||
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
|
||||
mask = spu_cmpgt(n, 0);
|
||||
mask = spu_andc(mask, resultx);
|
||||
|
||||
while (spu_extract(spu_gather(mask), 0)) {
|
||||
borrow = spu_genb(mant_x, mant_y);
|
||||
borrow = spu_shuffle(borrow, borrow, propagate);
|
||||
z = spu_subx(mant_x, mant_y, borrow);
|
||||
|
||||
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
|
||||
|
||||
mant_x = spu_sel(mant_x,
|
||||
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
|
||||
mask);
|
||||
|
||||
n = spu_add(n, -1);
|
||||
mask = spu_cmpgt(n, 0);
|
||||
}
|
||||
|
||||
borrow = spu_genb(mant_x, mant_y);
|
||||
borrow = spu_shuffle(borrow, borrow, propagate);
|
||||
z = spu_subx(mant_x, mant_y, borrow);
|
||||
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
|
||||
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
|
||||
|
||||
// bring back to original range
|
||||
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
|
||||
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
|
||||
|
||||
// for adj_y < 0 exp max=1
|
||||
shiftx0 = spu_extract(adj_y, 0);
|
||||
shiftx1 = spu_extract(adj_y, 2);
|
||||
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
|
||||
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
|
||||
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
|
||||
// for adj_y >= 0
|
||||
cnt = spu_cntlz(mant_x);
|
||||
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
|
||||
cnt = spu_add(cnt, -11);
|
||||
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
|
||||
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
|
||||
shiftx0 = spu_extract(shift, 0);
|
||||
shiftx1 = spu_extract(shift, 2);
|
||||
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
|
||||
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
|
||||
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
cnt = spu_sub(adj_y, (vec_uint4)shift);
|
||||
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
|
||||
|
||||
result = spu_sel(mant_l, mant_r, denorm_y);
|
||||
result = spu_sel(result, vec_zero, result0); // reminder 0
|
||||
result = spu_sel(result, abs_x, resultx); // x < y
|
||||
// result = spu_xor(result, sign); // set sign
|
||||
|
||||
// return ((vec_double2)result);
|
||||
}
|
||||
|
||||
|
||||
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
|
||||
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
|
||||
|
||||
/* if (2*x > y)
|
||||
* x -= y
|
||||
* if (2*x >= y) x -= y
|
||||
*/
|
||||
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
|
||||
// make x2
|
||||
abs_2x = _twice(abs_x); // 2 x x
|
||||
|
||||
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
|
||||
bias = spu_andc(bias, overflow);
|
||||
|
||||
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
|
||||
|
||||
|
||||
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
|
||||
|
||||
// make x2
|
||||
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
|
||||
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
|
||||
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
|
||||
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
|
||||
|
||||
/* select final answer
|
||||
*/
|
||||
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
|
||||
result = spu_sel(result, val_nan, nan_out); // if nan
|
||||
|
||||
return ((vec_double2)result);
|
||||
}
|
||||
|
||||
/*
|
||||
* subtraction function in limited confdition
|
||||
*/
|
||||
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
// which is bigger input aa or bb
|
||||
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
|
||||
|
||||
// need denorm calc ?
|
||||
vec_uint4 norm_a, norm_b;
|
||||
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_a = spu_and(norm_a, norm_b);
|
||||
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// calc (aa - bb) and (bb - aa)
|
||||
vec_uint4 res_a, res_b, res;
|
||||
vec_uint4 borrow_a, borrow_b;
|
||||
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
|
||||
borrow_a = spu_genb(aa, bb);
|
||||
borrow_b = spu_genb(bb, aa);
|
||||
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
|
||||
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
|
||||
res_a = spu_subx(aa, bb, borrow_a);
|
||||
res_b = spu_subx(bb, aa, borrow_b);
|
||||
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
|
||||
|
||||
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
|
||||
// select normal calc or special
|
||||
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* extend spu_cmpgt function to 64bit data
|
||||
*/
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
|
||||
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
|
||||
}
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
|
||||
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
/*
|
||||
* double formated x2
|
||||
*/
|
||||
static inline vec_uint4 _twice(vec_uint4 aa)
|
||||
{
|
||||
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
|
||||
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// if denorm or zero << 1 , if norm exp + 1
|
||||
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
//
|
||||
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
|
||||
// If x == 0, the result is 0.
|
||||
// If x != 0 and y == 0, the result is undefined.
|
||||
vector float
|
||||
remainderf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
|
||||
vec_int4 qi0, qi1, qi2;
|
||||
vec_float4 i0, i1, i2, i, rem;
|
||||
vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
|
||||
|
||||
// Find i = truncated_integer(|x/y|)
|
||||
|
||||
// By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
|
||||
// Thus the exact truncation is either the truncated quotient, one less, or one greater.
|
||||
|
||||
q = divf4( x, y );
|
||||
xabs = fabsf4( x );
|
||||
yabs = fabsf4( y );
|
||||
qabs = fabsf4( q );
|
||||
xabs2 = spu_add( xabs, xabs );
|
||||
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
|
||||
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
|
||||
|
||||
qi1 = spu_convts( qabs, 0 );
|
||||
qi0 = spu_add( qi1, -1 );
|
||||
qi2 = spu_add( qi1, 1 );
|
||||
|
||||
odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
|
||||
odd0 = odd2 = spu_nor( odd1, odd1 );
|
||||
|
||||
i0 = spu_convtf( qi0, 0 );
|
||||
i1 = spu_convtf( qi1, 0 );
|
||||
i2 = spu_convtf( qi2, 0 );
|
||||
|
||||
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
|
||||
// 2*|x| - i*|y| >= |x|:
|
||||
//
|
||||
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
|
||||
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
|
||||
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
|
||||
|
||||
cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
|
||||
cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
|
||||
|
||||
i = i0;
|
||||
i = spu_sel( i1, i, cmp1 );
|
||||
i = spu_sel( i2, i, cmp2 );
|
||||
|
||||
odd = odd0;
|
||||
odd = spu_sel( odd1, odd, cmp1 );
|
||||
odd = spu_sel( odd2, odd, cmp2 );
|
||||
|
||||
rem = spu_nmsub( i, yabs, xabs );
|
||||
|
||||
// Test whether i or i+1 = nearest_integer(|x/y|)
|
||||
//
|
||||
// i+1 is correct if:
|
||||
//
|
||||
// rem > 0.5*|y|
|
||||
// or
|
||||
// rem = 0.5*|y| and i is odd
|
||||
|
||||
yabshalf = spu_mul( yabs, spu_splats(0.5f) );
|
||||
cmp1 = spu_cmpgt( rem, yabshalf );
|
||||
cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
|
||||
|
||||
i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
|
||||
i = copysignf4( i, q );
|
||||
|
||||
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
|
||||
}
|
||||
|
||||
@@ -1,356 +0,0 @@
|
||||
/* remquod2 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
/*
|
||||
* This function returns the same vector double result as remainderd2().
|
||||
* In addition a vector signed long long is storedin *pquo,
|
||||
* that contains the corresponding element values whose sign is
|
||||
* the sign of xi / yi and whose magnitude is congruent modulo 2n to
|
||||
* the magnitude of the integral quotient of xi / yi, where n is
|
||||
* an implementation-defined integer greater than or equal to 3.
|
||||
*/
|
||||
|
||||
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _twice(vec_uint4 aa);
|
||||
|
||||
vector double
|
||||
remquod2(vector double x, vector double yy, vector signed long long *quo)
|
||||
{
|
||||
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
vec_int4 quotient, quotient0;
|
||||
vec_uint4 y_hi;
|
||||
vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y;
|
||||
vec_uint4 bias;
|
||||
vec_uint4 nan_out, not_ge, quo_pos, overflow;
|
||||
vec_uint4 result;
|
||||
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
|
||||
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
|
||||
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
|
||||
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
|
||||
vec_uint4 vec_zero = spu_splats((unsigned int)0);
|
||||
vec_uint4 is_zeroy;
|
||||
|
||||
// cut sign
|
||||
abs_x = spu_andc((vec_uint4)x, sign_mask);
|
||||
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
|
||||
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
|
||||
|
||||
quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1);
|
||||
quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
|
||||
|
||||
// check nan out
|
||||
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
|
||||
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
|
||||
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
|
||||
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
|
||||
nan_out = spu_or(nan_out, is_zeroy); // y = 0
|
||||
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
|
||||
|
||||
|
||||
// make y x8
|
||||
abs_2y = _twice(abs_yy); // 2 x y
|
||||
abs_4y = _twice(abs_2y); // 4 x y
|
||||
abs_8y = _twice(abs_4y); // 2 x y
|
||||
|
||||
/*
|
||||
* use fmodd2 function
|
||||
*/
|
||||
// get remainder of y x8
|
||||
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y);
|
||||
{
|
||||
vec_double2 y = (vec_double2)abs_8y;
|
||||
|
||||
int shiftx0, shiftx1, shifty0, shifty1;
|
||||
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
|
||||
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
|
||||
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
vec_int4 n, shift;
|
||||
vec_uint4 exp_x, exp_y;
|
||||
// , sign;
|
||||
// vec_uint4 abs_x, abs_y;
|
||||
vec_uint4 abs_y;
|
||||
vec_uint4 mant_x, mant_x0, mant_x1;
|
||||
vec_uint4 mant_y, mant_y0, mant_y1;
|
||||
vec_uint4 mant_0, mant_1;
|
||||
vec_uint4 mant_r, mant_l;
|
||||
// vec_uint4 result;
|
||||
vec_uint4 result0, resultx;
|
||||
vec_uint4 zero_x, zero_y;
|
||||
vec_uint4 denorm_x, denorm_y;
|
||||
vec_uint4 cnt, cnt_x, cnt_y;
|
||||
vec_uint4 shift_x, shift_y;
|
||||
vec_uint4 adj_x, adj_y;
|
||||
vec_uint4 z, borrow, mask;
|
||||
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
|
||||
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
|
||||
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
|
||||
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
|
||||
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
|
||||
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
|
||||
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
|
||||
|
||||
// sign = spu_and( (vec_uint4)x, sign_mask);
|
||||
// abs_x = spu_andc((vec_uint4)x, sign_mask);
|
||||
abs_y = spu_andc((vec_uint4)y, sign_mask);
|
||||
exp_x = spu_rlmask(abs_x, -20);
|
||||
exp_y = spu_rlmask(abs_y, -20);
|
||||
// get shift count for denorm
|
||||
cnt_x = spu_cntlz(abs_x);
|
||||
cnt_y = spu_cntlz(abs_y);
|
||||
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
|
||||
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
|
||||
|
||||
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
|
||||
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
|
||||
result0 = spu_or(zero_x, zero_y);
|
||||
result0 = spu_shuffle(result0, result0, splat_hi);
|
||||
|
||||
// 0 - (cnt_x - 11) = 11 - cnt_x
|
||||
shift_x= spu_add(cnt_x, -11);
|
||||
shift_y= spu_add(cnt_y, -11);
|
||||
cnt_x = spu_sub(11, cnt_x);
|
||||
cnt_y = spu_sub(11, cnt_y);
|
||||
|
||||
// count to normalize
|
||||
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
|
||||
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
|
||||
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
|
||||
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
|
||||
|
||||
// for denorm
|
||||
shiftx0 = spu_extract(shift_x, 0);
|
||||
shiftx1 = spu_extract(shift_x, 2);
|
||||
shifty0 = spu_extract(shift_y, 0);
|
||||
shifty1 = spu_extract(shift_y, 2);
|
||||
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
|
||||
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
|
||||
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
|
||||
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
|
||||
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
|
||||
|
||||
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
|
||||
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
|
||||
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
|
||||
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
|
||||
mant_x = spu_or(mant_x, implied_1); // hidden bit
|
||||
mant_y = spu_or(mant_y, implied_1); // hidden bit
|
||||
|
||||
// x < y ?
|
||||
resultx = _vec_gt64(abs_y, abs_x);
|
||||
|
||||
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
|
||||
mask = spu_cmpgt(n, 0);
|
||||
mask = spu_andc(mask, resultx);
|
||||
|
||||
while (spu_extract(spu_gather(mask), 0)) {
|
||||
borrow = spu_genb(mant_x, mant_y);
|
||||
borrow = spu_shuffle(borrow, borrow, propagate);
|
||||
z = spu_subx(mant_x, mant_y, borrow);
|
||||
|
||||
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
|
||||
|
||||
mant_x = spu_sel(mant_x,
|
||||
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
|
||||
mask);
|
||||
|
||||
n = spu_add(n, -1);
|
||||
mask = spu_cmpgt(n, 0);
|
||||
}
|
||||
|
||||
borrow = spu_genb(mant_x, mant_y);
|
||||
borrow = spu_shuffle(borrow, borrow, propagate);
|
||||
z = spu_subx(mant_x, mant_y, borrow);
|
||||
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
|
||||
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
|
||||
|
||||
// bring back to original range
|
||||
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
|
||||
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
|
||||
|
||||
// for adj_y < 0 exp max=1
|
||||
shiftx0 = spu_extract(adj_y, 0);
|
||||
shiftx1 = spu_extract(adj_y, 2);
|
||||
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
|
||||
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
|
||||
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
|
||||
// for adj_y >= 0
|
||||
cnt = spu_cntlz(mant_x);
|
||||
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
|
||||
cnt = spu_add(cnt, -11);
|
||||
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
|
||||
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
|
||||
shiftx0 = spu_extract(shift, 0);
|
||||
shiftx1 = spu_extract(shift, 2);
|
||||
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
|
||||
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
|
||||
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
|
||||
cnt = spu_sub(adj_y, (vec_uint4)shift);
|
||||
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
|
||||
|
||||
result = spu_sel(mant_l, mant_r, denorm_y);
|
||||
result = spu_sel(result, vec_zero, result0); // reminder 0
|
||||
result = spu_sel(result, abs_x, resultx); // x < y
|
||||
// result = spu_xor(result, sign); // set sign
|
||||
|
||||
// return ((vec_double2)result);
|
||||
}
|
||||
|
||||
// if y (x8->exp+3 7FF-7FC) overflow
|
||||
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
|
||||
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
|
||||
|
||||
/* if (x >= 4*y)
|
||||
* x -= 4*y
|
||||
* quotient = 4
|
||||
* else
|
||||
* quotient = 0
|
||||
*/
|
||||
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF));
|
||||
|
||||
not_ge = _vec_gt64(abs_4y, abs_x);
|
||||
not_ge = spu_or(not_ge, overflow);
|
||||
abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge);
|
||||
quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge);
|
||||
|
||||
/* if (x >= 2*y
|
||||
* x -= 2*y
|
||||
* quotient += 2
|
||||
*/
|
||||
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF));
|
||||
|
||||
not_ge = _vec_gt64(abs_2y, abs_x); // abs_2y > abs_x
|
||||
not_ge = spu_or(not_ge, overflow);
|
||||
|
||||
abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge);
|
||||
quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
|
||||
|
||||
/* if (2*x > y)
|
||||
* x -= y
|
||||
* if (2*x >= y) x -= y
|
||||
*/
|
||||
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
|
||||
// make x2
|
||||
abs_2x = _twice(abs_x); // 2 x x
|
||||
|
||||
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
|
||||
bias = spu_andc(bias, overflow);
|
||||
|
||||
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
|
||||
quotient = spu_sub(quotient, (vec_int4)bias);
|
||||
|
||||
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
|
||||
|
||||
// make x2
|
||||
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
|
||||
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
|
||||
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
|
||||
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
|
||||
quotient = spu_sub(quotient, (vec_int4)bias);
|
||||
|
||||
/* select final answer
|
||||
*/
|
||||
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
|
||||
result = spu_sel(result, val_nan, nan_out); // if nan
|
||||
|
||||
quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit
|
||||
quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4));
|
||||
quotient = spu_sel(quotient0, quotient, quo_pos);
|
||||
|
||||
*quo = (vec_llong2)quotient;
|
||||
|
||||
return ((vec_double2)result);
|
||||
}
|
||||
|
||||
/*
|
||||
* subtraction function in limited confdition
|
||||
*/
|
||||
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
// which is bigger input aa or bb
|
||||
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
|
||||
|
||||
// need denorm calc ?
|
||||
vec_uint4 norm_a, norm_b;
|
||||
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_a = spu_and(norm_a, norm_b);
|
||||
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// calc (aa - bb) and (bb - aa)
|
||||
vec_uint4 res_a, res_b, res;
|
||||
vec_uint4 borrow_a, borrow_b;
|
||||
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
|
||||
borrow_a = spu_genb(aa, bb);
|
||||
borrow_b = spu_genb(bb, aa);
|
||||
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
|
||||
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
|
||||
res_a = spu_subx(aa, bb, borrow_a);
|
||||
res_b = spu_subx(bb, aa, borrow_b);
|
||||
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
|
||||
|
||||
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
|
||||
// select normal calc or special
|
||||
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* extend spu_cmpgt function to 64bit data
|
||||
*/
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
|
||||
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
|
||||
}
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
|
||||
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
/*
|
||||
* double formated x2
|
||||
*/
|
||||
static inline vec_uint4 _twice(vec_uint4 aa)
|
||||
{
|
||||
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
|
||||
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// if denorm or zero << 1 , if norm exp + 1
|
||||
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
/* rsqrtd2 - for each of two double slots, compute reciprocal square root.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
//
|
||||
// Handles exceptional values as follows:
|
||||
// NaN -> NaN
|
||||
// (+,-)0 -> (+,-)Inf
|
||||
// +Inf -> +0
|
||||
// -Inf -> Nan
|
||||
// -Finite -> Nan
|
||||
// Denormal inputs are treated as zero.
|
||||
|
||||
vector double rsqrtd2 (vector double x)
|
||||
{
|
||||
vec_ullong2 expmask, onemask, signmask, evenexp;
|
||||
vec_double2 half, one, man, exp, nexp, y1, y2, y3, zero, inf, nan, result;
|
||||
vec_float4 halff, onef, manf, y0f, y1f;
|
||||
|
||||
expmask = spu_splats(0x7ff0000000000000ull);
|
||||
onemask = spu_splats(0x0010000000000000ull);
|
||||
signmask = spu_splats(0x8000000000000000ull);
|
||||
onef = spu_splats(1.0f);
|
||||
one = spu_extend( onef );
|
||||
halff = spu_splats(0.5f);
|
||||
half = spu_extend( halff );
|
||||
|
||||
// Factor input ( mantissa x 2^exponent ) into ( mantissa x 2^(-i) ) and ( 2^(exponent+i) )
|
||||
// where i = 0 when exponent is even and i = 1 when exponent is odd.
|
||||
//
|
||||
// Compute reciprocal-square-root of second factor by finding -(exponent+i)/2:
|
||||
//
|
||||
// biased_exp = 1023 + exponent
|
||||
// new_biased_exp = 1023 - (exponent+i)/2
|
||||
// = 1023 - (biased_exp-1023+i)/2
|
||||
// = (3069 - (biased_exp+i)) / 2
|
||||
|
||||
evenexp = spu_and( (vec_ullong2)x, onemask );
|
||||
man = spu_sel( x, (vec_double2)spu_add( spu_splats(0x3fe00000u), (vec_uint4)evenexp ), expmask );
|
||||
|
||||
exp = spu_and( x, (vec_double2)expmask );
|
||||
nexp = spu_or( exp, (vec_double2)onemask );
|
||||
nexp = (vec_double2)spu_rlmask( spu_sub( (vec_uint4)spu_splats(0xbfd0000000000000ull), (vec_uint4)nexp ), -1 );
|
||||
|
||||
// Compute mantissa part in single precision.
|
||||
// Convert back to double and multiply with 2^(-(exponent+i)/2), then
|
||||
// do two Newton-Raphson steps for full precision.
|
||||
|
||||
manf = spu_roundtf( man );
|
||||
y0f = spu_rsqrte( manf );
|
||||
y1f = spu_madd( spu_mul( y0f, halff ), spu_nmsub( y0f, spu_mul( y0f, manf ), onef ), y0f );
|
||||
y1 = spu_mul( spu_extend( y1f ), nexp );
|
||||
y2 = spu_madd( spu_mul( y1, half ), spu_nmsub( y1, spu_mul( y1, x ), one ), y1 );
|
||||
y3 = spu_madd( spu_mul( y2, half ), spu_nmsub( y2, spu_mul( y2, x ), one ), y2 );
|
||||
|
||||
// Choose iterated result or special value.
|
||||
|
||||
zero = spu_and( x, (vec_double2)signmask );
|
||||
inf = spu_sel( (vec_double2)expmask, x, signmask );
|
||||
nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
|
||||
|
||||
result = spu_sel( y3, zero, isinfd2 ( x ) );
|
||||
result = spu_sel( result, nan, signbitd2 ( x ) );
|
||||
result = spu_sel( result, inf, is0denormd2 ( x ) );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
116
Extras/simdmathlibrary/spu/simdmath/_lldiv.h
Normal file
116
Extras/simdmathlibrary/spu/simdmath/_lldiv.h
Normal file
@@ -0,0 +1,116 @@
|
||||
/* Common functions for lldivi2/lldivu2
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLDIV_H___
|
||||
#define ___SIMD_MATH_LLDIV_H___
|
||||
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_cntlz(vector unsigned long long x)
|
||||
{
|
||||
vec_uint4 cnt;
|
||||
|
||||
cnt = spu_cntlz((vec_uint4)x);
|
||||
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
|
||||
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
|
||||
|
||||
return (vec_ullong2)cnt;
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
|
||||
{
|
||||
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
vec_ullong2 x_upper, x_lower;
|
||||
|
||||
// shift upper word
|
||||
x_upper = spu_and(x, mask);
|
||||
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
|
||||
// shift lower word
|
||||
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
|
||||
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
|
||||
|
||||
return spu_sel(x_lower, x_upper, mask);
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
|
||||
{
|
||||
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
vec_ullong2 x_upper, x_lower;
|
||||
vec_uint4 cnt_byte;
|
||||
|
||||
cnt_byte = spu_add((vec_uint4)count, 7);
|
||||
|
||||
// shift upper word
|
||||
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
|
||||
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
|
||||
|
||||
// shift lower word
|
||||
x_lower = spu_andc(x, mask);
|
||||
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
|
||||
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
|
||||
|
||||
return spu_sel(x_lower, x_upper, mask);
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_cmpeq_zero(vector unsigned long long x)
|
||||
{
|
||||
vec_uint4 cmp;
|
||||
|
||||
cmp = spu_cmpeq((vec_uint4)x, 0);
|
||||
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
|
||||
{
|
||||
vec_uint4 gt;
|
||||
|
||||
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
|
||||
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
|
||||
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
static inline vector unsigned long long
|
||||
__ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
|
||||
{
|
||||
vec_uint4 borrow;
|
||||
|
||||
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
|
||||
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
|
||||
}
|
||||
|
||||
#endif // __LLDIV_H__
|
||||
|
||||
84
Extras/simdmathlibrary/spu/simdmath/_remainder.h
Normal file
84
Extras/simdmathlibrary/spu/simdmath/_remainder.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/* A vector double is returned that contains the internal routine regarding remainder.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH__REMAINDER_H___
|
||||
#define ___SIMD_MATH__REMAINDER_H___
|
||||
|
||||
#include <simdmath/_vec_utils.h>
|
||||
|
||||
/*
|
||||
* double formated x2
|
||||
*/
|
||||
static inline vec_uint4
|
||||
__rem_twice_d(vec_uint4 aa)
|
||||
{
|
||||
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
|
||||
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// if denorm or zero << 1 , if norm exp + 1
|
||||
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
|
||||
}
|
||||
|
||||
/*
|
||||
* subtraction function in limited confdition
|
||||
*/
|
||||
static inline vec_uint4
|
||||
__rem_sub_d(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
// which is bigger input aa or bb
|
||||
vec_uint4 is_bigb = __vec_gt64(bb, aa); // bb > aa
|
||||
|
||||
// need denorm calc ?
|
||||
vec_uint4 norm_a, norm_b;
|
||||
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
|
||||
norm_a = spu_and(norm_a, norm_b);
|
||||
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
|
||||
// calc (aa - bb) and (bb - aa)
|
||||
vec_uint4 res_a, res_b, res;
|
||||
vec_uint4 borrow_a, borrow_b;
|
||||
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
|
||||
borrow_a = spu_genb(aa, bb);
|
||||
borrow_b = spu_genb(bb, aa);
|
||||
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
|
||||
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
|
||||
res_a = spu_subx(aa, bb, borrow_a);
|
||||
res_b = spu_subx(bb, aa, borrow_b);
|
||||
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
|
||||
|
||||
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
|
||||
// select normal calc or special
|
||||
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
57
Extras/simdmathlibrary/spu/simdmath/_vec_utils.h
Normal file
57
Extras/simdmathlibrary/spu/simdmath/_vec_utils.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/* Common types for SPU SIMD Math Library
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH__VEC_UTILS_H___
|
||||
#define ___SIMD_MATH__VEC_UTILS_H___
|
||||
|
||||
/*
|
||||
* extend spu_cmpgt function to 64bit data
|
||||
*/
|
||||
static inline vec_uint4
|
||||
__vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
|
||||
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
|
||||
}
|
||||
static inline vec_uint4
|
||||
__vec_gt64(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt_hi = __vec_gt64_half(aa, bb); // only higher is right
|
||||
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
static inline vec_uint4
|
||||
__vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb);
|
||||
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,18 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ABSI4_H___
|
||||
#define ___SIMD_MATH_ABSI4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector signed int
|
||||
absi4 (vector signed int x)
|
||||
static inline vector signed int
|
||||
_absi4 (vector signed int x)
|
||||
{
|
||||
vec_int4 neg;
|
||||
neg = spu_sub( 0, x );
|
||||
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
|
||||
vec_int4 neg;
|
||||
neg = spu_sub( 0, x );
|
||||
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,52 +27,56 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ACOSF4_H___
|
||||
#define ___SIMD_MATH_ACOSF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/sqrtf4.h>
|
||||
|
||||
//
|
||||
// Computes the inverse cosine of all four slots of x
|
||||
//
|
||||
vector float
|
||||
acosf4 (vector float x)
|
||||
static inline vector float
|
||||
_acosf4 (vector float x)
|
||||
{
|
||||
vec_float4 result, xabs;
|
||||
vec_float4 t1;
|
||||
vec_float4 xabs2, xabs4;
|
||||
vec_float4 hi, lo;
|
||||
vec_float4 neg, pos;
|
||||
vec_uint4 select;
|
||||
vec_float4 result, xabs;
|
||||
vec_float4 t1;
|
||||
vec_float4 xabs2, xabs4;
|
||||
vec_float4 hi, lo;
|
||||
vec_float4 neg, pos;
|
||||
vec_uint4 select;
|
||||
|
||||
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
|
||||
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
|
||||
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
|
||||
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
|
||||
|
||||
t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
|
||||
t1 = _sqrtf4(spu_sub( spu_splats(1.0f), xabs));
|
||||
|
||||
/* Instruction counts can be reduced if the polynomial was
|
||||
* computed entirely from nested (dependent) fma's. However,
|
||||
* to reduce the number of pipeline stalls, the polygon is evaluated
|
||||
* in two halves (hi amd lo).
|
||||
*/
|
||||
xabs2 = spu_mul(xabs, xabs);
|
||||
xabs4 = spu_mul(xabs2, xabs2);
|
||||
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
|
||||
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
|
||||
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
|
||||
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
|
||||
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
|
||||
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
|
||||
/* Instruction counts can be reduced if the polynomial was
|
||||
* computed entirely from nested (dependent) fma's. However,
|
||||
* to reduce the number of pipeline stalls, the polygon is evaluated
|
||||
* in two halves (hi amd lo).
|
||||
*/
|
||||
xabs2 = spu_mul(xabs, xabs);
|
||||
xabs4 = spu_mul(xabs2, xabs2);
|
||||
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
|
||||
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
|
||||
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
|
||||
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
|
||||
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
|
||||
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
|
||||
|
||||
result = spu_madd(hi, xabs4, lo);
|
||||
result = spu_madd(hi, xabs4, lo);
|
||||
|
||||
/* Adjust the result if x is negactive.
|
||||
*/
|
||||
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
|
||||
pos = spu_mul(t1, result);
|
||||
/* Adjust the result if x is negactive.
|
||||
*/
|
||||
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
|
||||
pos = spu_mul(t1, result);
|
||||
|
||||
result = spu_sel(pos, neg, select);
|
||||
result = spu_sel(pos, neg, select);
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -27,59 +27,66 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ASINF4_H___
|
||||
#define ___SIMD_MATH_ASINF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
asinf4 (vector float x)
|
||||
#include <simdmath/sqrtf4.h>
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
static inline vector float
|
||||
_asinf4 (vector float x)
|
||||
{
|
||||
// positive = (x > 0)
|
||||
//
|
||||
vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f));
|
||||
// positive = (x > 0)
|
||||
//
|
||||
vec_uint4 positive = spu_cmpgt(x,spu_splats(0.0f));
|
||||
|
||||
// gtHalf = (|x| > 0.5)
|
||||
//
|
||||
vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f));
|
||||
// gtHalf = (|x| > 0.5)
|
||||
//
|
||||
vec_uint4 gtHalf = spu_cmpabsgt(x,spu_splats(0.5f));
|
||||
|
||||
// x = absf(x)
|
||||
//
|
||||
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
|
||||
// x = absf(x)
|
||||
//
|
||||
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
|
||||
|
||||
|
||||
// if (x > 0.5)
|
||||
// g = 0.5 - 0.5*x
|
||||
// x = -2 * sqrtf(g)
|
||||
// else
|
||||
// g = x * x
|
||||
//
|
||||
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
|
||||
// if (x > 0.5)
|
||||
// g = 0.5 - 0.5*x
|
||||
// x = -2 * sqrtf(g)
|
||||
// else
|
||||
// g = x * x
|
||||
//
|
||||
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
|
||||
|
||||
x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf);
|
||||
x = spu_sel(x,spu_mul(spu_splats(-2.0f),_sqrtf4(g)),gtHalf);
|
||||
|
||||
// Compute the polynomials and take their ratio
|
||||
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
|
||||
// num = x * g * (-0.504400557f * g + 0.933933258f)
|
||||
//
|
||||
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
|
||||
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
|
||||
denom = spu_madd(denom,g,spu_splats(5.603603363f));
|
||||
num = spu_mul(spu_mul(x,g),num);
|
||||
// Compute the polynomials and take their ratio
|
||||
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
|
||||
// num = x * g * (-0.504400557f * g + 0.933933258f)
|
||||
//
|
||||
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
|
||||
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
|
||||
denom = spu_madd(denom,g,spu_splats(5.603603363f));
|
||||
num = spu_mul(spu_mul(x,g),num);
|
||||
|
||||
|
||||
// x = x + num / denom
|
||||
//
|
||||
x = spu_add(x,divf4(num,denom));
|
||||
// x = x + num / denom
|
||||
//
|
||||
x = spu_add(x,_divf4(num,denom));
|
||||
|
||||
// if (x > 0.5)
|
||||
// x = x + M_PI_2
|
||||
//
|
||||
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
|
||||
// if (x > 0.5)
|
||||
// x = x + M_PI_2
|
||||
//
|
||||
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
|
||||
|
||||
|
||||
// if (!positive) x = -x
|
||||
//
|
||||
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
|
||||
// if (!positive) x = -x
|
||||
//
|
||||
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
|
||||
|
||||
return x;
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,34 +27,40 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ATAN2F4_H___
|
||||
#define ___SIMD_MATH_ATAN2F4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/atanf4.h>
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
//
|
||||
// Inverse tangent function of two variables
|
||||
//
|
||||
vector float
|
||||
atan2f4 (vector float y, vector float x)
|
||||
static inline vector float
|
||||
_atan2f4 (vector float y, vector float x)
|
||||
{
|
||||
vec_float4 res = atanf4(divf4(y,x));
|
||||
vec_float4 res = _atanf4(_divf4(y,x));
|
||||
|
||||
// Use the arguments to determine the quadrant of the result:
|
||||
// if (x < 0)
|
||||
// if (y < 0)
|
||||
// res = -PI + res
|
||||
// else
|
||||
// res = PI + res
|
||||
//
|
||||
vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y);
|
||||
vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x);
|
||||
// Use the arguments to determine the quadrant of the result:
|
||||
// if (x < 0)
|
||||
// if (y < 0)
|
||||
// res = -PI + res
|
||||
// else
|
||||
// res = PI + res
|
||||
//
|
||||
vec_uint4 yNeg = spu_cmpgt(spu_splats(0.0f),y);
|
||||
vec_uint4 xNeg = spu_cmpgt(spu_splats(0.0f),x);
|
||||
|
||||
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
|
||||
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
|
||||
|
||||
vec_float4 newRes = spu_add(bias, res);
|
||||
vec_float4 newRes = spu_add(bias, res);
|
||||
|
||||
res = spu_sel(res,newRes,xNeg);
|
||||
res = spu_sel(res,newRes,xNeg);
|
||||
|
||||
return res;
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,50 +27,55 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ATANF4_H___
|
||||
#define ___SIMD_MATH_ATANF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/recipf4.h>
|
||||
|
||||
//
|
||||
// Computes the inverse tangent of all four slots of x.
|
||||
//
|
||||
vector float
|
||||
atanf4 (vector float x)
|
||||
static inline vector float
|
||||
_atanf4 (vector float x)
|
||||
{
|
||||
vec_float4 bias;
|
||||
vec_float4 x2, x3, x4, x8, x9;
|
||||
vec_float4 hi, lo;
|
||||
vec_float4 result;
|
||||
vec_float4 inv_x;
|
||||
vec_uint4 sign;
|
||||
vec_uint4 select;
|
||||
vec_float4 bias;
|
||||
vec_float4 x2, x3, x4, x8, x9;
|
||||
vec_float4 hi, lo;
|
||||
vec_float4 result;
|
||||
vec_float4 inv_x;
|
||||
vec_uint4 sign;
|
||||
vec_uint4 select;
|
||||
|
||||
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
|
||||
inv_x = recipf4(x);
|
||||
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
|
||||
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
|
||||
inv_x = _recipf4(x);
|
||||
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
|
||||
|
||||
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
|
||||
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
|
||||
bias = (vec_float4)spu_and((vec_uint4)bias, select);
|
||||
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
|
||||
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
|
||||
bias = (vec_float4)spu_and((vec_uint4)bias, select);
|
||||
|
||||
x = spu_sel(x, inv_x, select);
|
||||
x = spu_sel(x, inv_x, select);
|
||||
|
||||
bias = spu_add(bias, x);
|
||||
x2 = spu_mul(x, x);
|
||||
x3 = spu_mul(x2, x);
|
||||
x4 = spu_mul(x2, x2);
|
||||
x8 = spu_mul(x4, x4);
|
||||
x9 = spu_mul(x8, x);
|
||||
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
|
||||
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
|
||||
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
|
||||
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
|
||||
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
|
||||
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
|
||||
lo = spu_madd(lo, x3, bias);
|
||||
bias = spu_add(bias, x);
|
||||
x2 = spu_mul(x, x);
|
||||
x3 = spu_mul(x2, x);
|
||||
x4 = spu_mul(x2, x2);
|
||||
x8 = spu_mul(x4, x4);
|
||||
x9 = spu_mul(x8, x);
|
||||
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
|
||||
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
|
||||
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
|
||||
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
|
||||
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
|
||||
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
|
||||
lo = spu_madd(lo, x3, bias);
|
||||
|
||||
result = spu_madd(hi, x9, lo);
|
||||
result = spu_madd(hi, x9, lo);
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,79 +27,69 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_CBRTF4_H___
|
||||
#define ___SIMD_MATH_CBRTF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/frexpf4.h>
|
||||
#include <simdmath/ldexpf4.h>
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
#define __calcQuot(xexp) n = xexp; \
|
||||
vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n); \
|
||||
n = spu_sel(n, spu_add(n,2), negxexpmask); \
|
||||
\
|
||||
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); \
|
||||
quot = spu_add(quot, spu_rlmaska(quot, -4)); \
|
||||
quot = spu_add(quot, spu_rlmaska(quot, -8)); \
|
||||
quot = spu_add(quot, spu_rlmaska(quot,-16)); \
|
||||
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); \
|
||||
quot = spu_add( \
|
||||
quot, \
|
||||
spu_rlmaska( \
|
||||
spu_add( \
|
||||
spu_add(r,5), \
|
||||
spu_sl (r,2) \
|
||||
), \
|
||||
-4 \
|
||||
) \
|
||||
); \
|
||||
static inline vec_int4
|
||||
__cbrtf4_calc_quot(vec_int4 n)
|
||||
{
|
||||
vec_int4 quot;
|
||||
vec_uint4 negxexpmask = spu_cmpgt(spu_splats(0), n);
|
||||
n = spu_sel(n, spu_add(n,2), negxexpmask);
|
||||
|
||||
#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3)
|
||||
#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
|
||||
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));
|
||||
quot = spu_add(quot, spu_rlmaska(quot, -4));
|
||||
quot = spu_add(quot, spu_rlmaska(quot, -8));
|
||||
quot = spu_add(quot, spu_rlmaska(quot,-16));
|
||||
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));
|
||||
quot = spu_add(quot, spu_rlmaska(spu_add(spu_add(r,5), spu_sl (r,2)), -4));
|
||||
return quot;
|
||||
}
|
||||
|
||||
vector float
|
||||
cbrtf4 (vector float x)
|
||||
#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3)
|
||||
#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
|
||||
|
||||
static inline vector float
|
||||
_cbrtf4 (vector float x)
|
||||
{
|
||||
vec_float4 zeros = spu_splats(0.0f);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros);
|
||||
vec_int4 xexp, n;
|
||||
vec_uint4 zeromask = spu_cmpeq(x, zeros);
|
||||
vec_int4 xexp;
|
||||
vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
|
||||
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
|
||||
vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x);
|
||||
x = spu_and(x, sgnmask);
|
||||
|
||||
x = frexpf4(x, &xexp);
|
||||
x = _frexpf4(x, &xexp);
|
||||
vec_float4 p = spu_madd(
|
||||
spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
|
||||
x,
|
||||
spu_splats(0.492659620528969547f)
|
||||
);
|
||||
vec_float4 p3 = spu_mul(p, spu_mul(p, p));
|
||||
vec_int4 quot;
|
||||
__calcQuot(xexp);
|
||||
vec_int4 quot = __cbrtf4_calc_quot(xexp);
|
||||
vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
|
||||
vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2));
|
||||
factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1));
|
||||
vec_float4 factor = spu_splats((float)(1.0/__CBRTF_sqr_cbrt2));
|
||||
factor = spu_sel(factor, spu_splats((float)(1.0/__CBRTF_cbrt2)), spu_cmpeq(modval,-1));
|
||||
factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0));
|
||||
factor = spu_sel(factor, spu_splats((float)( _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1));
|
||||
factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2));
|
||||
factor = spu_sel(factor, spu_splats((float)( __CBRTF_cbrt2)), spu_cmpeq(modval, 1));
|
||||
factor = spu_sel(factor, spu_splats((float)(__CBRTF_sqr_cbrt2)), spu_cmpeq(modval, 2));
|
||||
|
||||
vec_float4 pre = spu_mul(p, factor);
|
||||
vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
|
||||
vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
|
||||
vec_float4 res = spu_mul(pre, divf4(numr, denr));
|
||||
res = ldexpf4(res, quot);
|
||||
vec_float4 res = spu_mul(pre, _divf4(numr, denr));
|
||||
res = _ldexpf4(res, quot);
|
||||
|
||||
return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
|
||||
zeros,
|
||||
zeromask);
|
||||
}
|
||||
|
||||
/*
|
||||
_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
|
||||
{
|
||||
vec_uchar16 neg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
|
||||
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
|
||||
vec_float4 absx = spu_andc(x, sbit);
|
||||
vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx)));
|
||||
res = spu_sel(res, spu_or(sbit, res), neg);
|
||||
return res;
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
@@ -27,11 +27,14 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_CEILD2_H___
|
||||
#define ___SIMD_MATH_CEILD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
ceild2(vector double in)
|
||||
static inline vector double
|
||||
_ceild2(vector double in)
|
||||
{
|
||||
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
|
||||
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
@@ -83,7 +86,7 @@ ceild2(vector double in)
|
||||
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
|
||||
|
||||
/* replace insert
|
||||
*/
|
||||
*/
|
||||
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
|
||||
|
||||
/* in + addend
|
||||
@@ -92,3 +95,5 @@ ceild2(vector double in)
|
||||
|
||||
return (out);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,28 +27,32 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_CEILF4_H___
|
||||
#define ___SIMD_MATH_CEILF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
ceilf4 (vector float x)
|
||||
static inline vector float
|
||||
_ceilf4 (vector float x)
|
||||
{
|
||||
vec_int4 xi, xi1;
|
||||
vec_uint4 inrange;
|
||||
vec_float4 truncated, truncated1;
|
||||
vec_int4 xi, xi1;
|
||||
vec_uint4 inrange;
|
||||
vec_float4 truncated, truncated1;
|
||||
|
||||
// Find truncated value and one greater.
|
||||
// Find truncated value and one greater.
|
||||
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
|
||||
|
||||
xi = spu_convts( x, 0 );
|
||||
xi1 = spu_add( xi, 1 );
|
||||
xi = spu_convts( x, 0 );
|
||||
xi1 = spu_add( xi, 1 );
|
||||
|
||||
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
|
||||
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
|
||||
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
|
||||
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
|
||||
|
||||
// If truncated value is less than input, add one.
|
||||
// If truncated value is less than input, add one.
|
||||
|
||||
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
|
||||
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,13 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_COPYSIGND2_H___
|
||||
#define ___SIMD_MATH_COPYSIGND2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector double copysignd2 (vector double x, vector double y)
|
||||
static inline vector double
|
||||
_copysignd2 (vector double x, vector double y)
|
||||
{
|
||||
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
|
||||
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,13 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_COPYSIGNF4_H___
|
||||
#define ___SIMD_MATH_COPYSIGNF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector float
|
||||
copysignf4 (vector float x, vector float y)
|
||||
static inline vector float
|
||||
_copysignf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_sel( x, y, spu_splats(0x80000000) );
|
||||
return spu_sel( x, y, spu_splats(0x80000000) );
|
||||
}
|
||||
|
||||
#endif
|
||||
46
Extras/simdmathlibrary/spu/simdmath/cosd2.h
Normal file
46
Extras/simdmathlibrary/spu/simdmath/cosd2.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/* cosd2 - Computes the cosine of the each of two double slots.
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_COSD2_H___
|
||||
#define ___SIMD_MATH_COSD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/sincosd2.h>
|
||||
|
||||
static inline vector double
|
||||
_cosd2 (vector double x)
|
||||
{
|
||||
vec_double2 s, c;
|
||||
_sincosd2(x, &s, &c);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
46
Extras/simdmathlibrary/spu/simdmath/cosf4.h
Normal file
46
Extras/simdmathlibrary/spu/simdmath/cosf4.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_COSF4_H___
|
||||
#define ___SIMD_MATH_COSF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/sincosf4.h>
|
||||
|
||||
static inline vector float
|
||||
_cosf4 (vector float x)
|
||||
{
|
||||
vec_float4 s, c;
|
||||
_sincosf4(x, &s, &c);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,15 +27,21 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_DIVD2_H___
|
||||
#define ___SIMD_MATH_DIVD2_H___
|
||||
|
||||
// Equal to numer * recipd2(denom)
|
||||
// See recipd2 for results of special values.
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
divd2 (vector double numer, vector double denom)
|
||||
#include <simdmath/recipd2.h>
|
||||
|
||||
static inline vector double
|
||||
_divd2 (vector double numer, vector double denom)
|
||||
{
|
||||
return spu_mul( numer, recipd2( denom ) );
|
||||
return spu_mul( numer, _recipd2( denom ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,20 +27,24 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_DIVF4_H___
|
||||
#define ___SIMD_MATH_DIVF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
divf4 (vector float numer, vector float denom)
|
||||
static inline vector float
|
||||
_divf4 (vector float numer, vector float denom)
|
||||
{
|
||||
// Reciprocal estimate and 1 Newton-Raphson iteration.
|
||||
// Uses constant of 1.0 + 1 ulp to improve accuracy.
|
||||
// Reciprocal estimate and 1 Newton-Raphson iteration.
|
||||
// Uses constant of 1.0 + 1 ulp to improve accuracy.
|
||||
|
||||
vector float y0, y0numer;
|
||||
vector float oneish = (vector float)spu_splats(0x3f800001);
|
||||
vector float y0, y0numer;
|
||||
vector float oneish = (vector float)spu_splats(0x3f800001);
|
||||
|
||||
y0 = spu_re( denom );
|
||||
y0numer = spu_mul( numer, y0 );
|
||||
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
|
||||
y0 = spu_re( denom );
|
||||
y0numer = spu_mul( numer, y0 );
|
||||
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
|
||||
}
|
||||
|
||||
#endif
|
||||
67
Extras/simdmathlibrary/spu/simdmath/divi4.h
Normal file
67
Extras/simdmathlibrary/spu/simdmath/divi4.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/* divi4 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_DIVI4_H___
|
||||
#define ___SIMD_MATH_DIVI4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/divu4.h>
|
||||
|
||||
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
|
||||
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
static inline divi4_t
|
||||
_divi4 (vector signed int numer, vector signed int denom)
|
||||
{
|
||||
divu4_t resAbs;
|
||||
divi4_t res;
|
||||
vec_uint4 numerPos, denomPos, quotNeg;
|
||||
vec_uint4 numerAbs, denomAbs;
|
||||
|
||||
// Determine whether result needs sign change
|
||||
|
||||
numerPos = spu_cmpgt( numer, -1 );
|
||||
denomPos = spu_cmpgt( denom, -1 );
|
||||
quotNeg = spu_xor( numerPos, denomPos );
|
||||
|
||||
// Use absolute values of numerator, denominator
|
||||
|
||||
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
|
||||
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
|
||||
|
||||
resAbs = _divu4(numerAbs, denomAbs);
|
||||
|
||||
res.quot = spu_sel( (vec_int4)resAbs.quot, spu_sub( 0, (vec_int4)resAbs.quot ), quotNeg );
|
||||
res.rem = spu_sel( spu_sub( 0, (vec_int4)resAbs.rem ), (vec_int4)resAbs.rem, numerPos );
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,44 +27,48 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_DIVU4_H___
|
||||
#define ___SIMD_MATH_DIVU4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
|
||||
// and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
|
||||
static inline divu4_t
|
||||
_divu4 (vector unsigned int numer, vector unsigned int denom)
|
||||
{
|
||||
divu4_t res;
|
||||
vec_int4 shift;
|
||||
vec_uint4 quot, newQuot;
|
||||
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
|
||||
vec_uint4 newNum, skip, cont;
|
||||
int anyCont;
|
||||
divu4_t res;
|
||||
vec_int4 shift;
|
||||
vec_uint4 quot, newQuot;
|
||||
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
|
||||
vec_uint4 newNum, skip, cont;
|
||||
int anyCont;
|
||||
|
||||
// Get difference of leading zeros.
|
||||
// Any possible negative value will be interpreted as a shift > 31
|
||||
// Get difference of leading zeros.
|
||||
// Any possible negative value will be interpreted as a shift > 31
|
||||
|
||||
denomZeros = spu_cntlz( denom );
|
||||
numerZeros = spu_cntlz( numer );
|
||||
denomZeros = spu_cntlz( denom );
|
||||
numerZeros = spu_cntlz( numer );
|
||||
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
|
||||
// Shift denom to align leading one with numerator's
|
||||
// Shift denom to align leading one with numerator's
|
||||
|
||||
denomShifted = spu_sl( denom, (vec_uint4)shift );
|
||||
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
|
||||
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
|
||||
denomShifted = spu_sl( denom, (vec_uint4)shift );
|
||||
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
|
||||
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
|
||||
|
||||
// Shift left all leading zeros.
|
||||
// Shift left all leading zeros.
|
||||
|
||||
denomLeft = spu_sl( denom, denomZeros );
|
||||
oneLeft = spu_sl( spu_splats(1U), denomZeros );
|
||||
denomLeft = spu_sl( denom, denomZeros );
|
||||
oneLeft = spu_sl( spu_splats(1U), denomZeros );
|
||||
|
||||
quot = spu_splats(0U);
|
||||
quot = spu_splats(0U);
|
||||
|
||||
do
|
||||
{
|
||||
do
|
||||
{
|
||||
cont = spu_cmpgt( oneShifted, 0U );
|
||||
anyCont = spu_extract( spu_gather( cont ), 0 );
|
||||
|
||||
@@ -87,11 +91,12 @@ divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
|
||||
|
||||
quot = spu_sel( newQuot, quot, skip );
|
||||
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
|
||||
}
|
||||
while ( anyCont );
|
||||
}
|
||||
while ( anyCont );
|
||||
|
||||
res.quot = quot;
|
||||
res.rem = numer;
|
||||
return res;
|
||||
res.quot = quot;
|
||||
res.rem = numer;
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,6 +27,8 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_EXP2F4_H___
|
||||
#define ___SIMD_MATH_EXP2F4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
@@ -72,10 +74,10 @@
|
||||
*/
|
||||
|
||||
|
||||
#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */
|
||||
#define __EXP2F_LN2 0.69314718055995f /* ln(2) */
|
||||
|
||||
vector float
|
||||
exp2f4 (vector float x)
|
||||
static inline vector float
|
||||
_exp2f4 (vector float x)
|
||||
{
|
||||
vec_int4 ix;
|
||||
vec_uint4 overflow, underflow;
|
||||
@@ -91,7 +93,7 @@ exp2f4 (vector float x)
|
||||
bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
|
||||
ix = spu_convts(spu_add(x, bias), 0);
|
||||
frac = spu_sub(spu_convtf(ix, 0), x);
|
||||
frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2));
|
||||
frac = spu_mul(frac, spu_splats(__EXP2F_LN2));
|
||||
|
||||
// !!! HRD Changing weird un-understandable and incorrect overflow handling code
|
||||
//overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));
|
||||
@@ -99,7 +101,7 @@ exp2f4 (vector float x)
|
||||
underflow = spu_cmpgt(spu_splats(-126.0f), x);
|
||||
|
||||
//exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
|
||||
// !!! overflow (x >= 127.999999f)
|
||||
// !!! overflow (x >= 127.999999f)
|
||||
exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation
|
||||
exp_int = spu_add(exp_int, exp_int); // !!! HRD
|
||||
|
||||
@@ -123,9 +125,11 @@ exp2f4 (vector float x)
|
||||
result = spu_mul(exp_frac, exp_int);
|
||||
|
||||
/* Handle overflow */
|
||||
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow);
|
||||
result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow);
|
||||
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), overflow);
|
||||
result = spu_sel(result, (vec_float4)spu_splats(0), underflow);
|
||||
//result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,37 +27,44 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_EXPF4_H___
|
||||
#define ___SIMD_MATH_EXPF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#define _EXPF_H_C1 ((float)-0.6931470632553101f)
|
||||
#define _EXPF_H_C2 ((float)-1.1730463525082e-7f)
|
||||
#include <simdmath/divf4.h>
|
||||
#include <simdmath/ldexpf4.h>
|
||||
|
||||
#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
|
||||
#define __EXPF_C1 -0.6931470632553101f
|
||||
#define __EXPF_C2 -1.1730463525082e-7f
|
||||
|
||||
vector float
|
||||
expf4 (vector float x)
|
||||
#define __EXPF_INVLN2 1.4426950408889634f
|
||||
|
||||
static inline vector float
|
||||
_expf4 (vector float x)
|
||||
{
|
||||
vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
|
||||
vec_float4 goffset = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask);
|
||||
vec_float4 g = spu_mul(x, spu_splats(_EXPF_H_INVLN2));
|
||||
vec_uint4 xnegmask = spu_cmpgt(spu_splats(0.0f), x);
|
||||
vec_float4 goffset = spu_sel(spu_splats(0.5f),spu_splats(-0.5f),xnegmask);
|
||||
vec_float4 g = spu_mul(x, spu_splats(__EXPF_INVLN2));
|
||||
vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
|
||||
|
||||
g = spu_convtf(xexp, 0);
|
||||
g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x));
|
||||
g = spu_madd(g, spu_splats(__EXPF_C2), spu_madd(g, spu_splats(__EXPF_C1), x));
|
||||
vec_float4 z = spu_mul(g, g);
|
||||
vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f));
|
||||
vec_float4 a = spu_mul(z, spu_splats(0.0999748594f));
|
||||
vec_float4 b = spu_mul(g,
|
||||
spu_madd(z,
|
||||
spu_splats((float)0.0083208258f),
|
||||
spu_splats((float)0.4999999992f)
|
||||
spu_splats(0.0083208258f),
|
||||
spu_splats(0.4999999992f)
|
||||
)
|
||||
);
|
||||
|
||||
vec_float4 foo = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
|
||||
spu_add(spu_splats(1.0f), spu_sub(a, b)));
|
||||
vec_float4 foo = _divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
|
||||
spu_add(spu_splats(1.0f), spu_sub(a, b)));
|
||||
|
||||
return ldexpf4(foo, xexp);
|
||||
return _ldexpf4(foo, xexp);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,28 +27,36 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_EXPMLF4_H___
|
||||
#define ___SIMD_MATH_EXPMLF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
|
||||
#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
|
||||
#include <simdmath/expf4.h>
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
vector float
|
||||
expm1f4 (vector float x)
|
||||
#define __EXPM1F_ln1by2 -0.6931471805599f
|
||||
#define __EXPM1F_ln3by2 0.4054651081082f
|
||||
|
||||
static inline vector float
|
||||
_expm1f4 (vector float x)
|
||||
{
|
||||
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)),
|
||||
spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x));
|
||||
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(__EXPM1F_ln1by2)),
|
||||
spu_cmpgt(spu_splats(__EXPM1F_ln3by2), x));
|
||||
vec_float4 x2 = spu_mul(x,x);
|
||||
vec_float4 d0, d1, n0, n1;
|
||||
|
||||
d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f));
|
||||
d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0);
|
||||
d0 = spu_madd(x , spu_splats(-0.3203561199f), spu_splats(0.9483177697f));
|
||||
d1 = spu_madd(x2, spu_splats(0.0326527809f), d0);
|
||||
|
||||
n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f));
|
||||
n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f));
|
||||
n0 = spu_madd(x , spu_splats(0.1538026623f), spu_splats(0.9483177732f));
|
||||
n1 = spu_madd(x , spu_splats(0.0024490478f), spu_splats(0.0305274668f));
|
||||
n1 = spu_madd(x2, n1, n0);
|
||||
|
||||
return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)),
|
||||
spu_mul(x, divf4(n1, d1)),
|
||||
return spu_sel(spu_sub(_expf4(x), spu_splats(1.0f)),
|
||||
spu_mul(x, _divf4(n1, d1)),
|
||||
nearzeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FABSD2_H___
|
||||
#define ___SIMD_MATH_FABSD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector double fabsd2 (vector double x)
|
||||
static inline vector double
|
||||
_fabsd2 (vector double x)
|
||||
{
|
||||
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FABSF4_H___
|
||||
#define ___SIMD_MATH_FABSF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float fabsf4 (vector float x)
|
||||
static inline vector float
|
||||
_fabsf4 (vector float x)
|
||||
{
|
||||
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
|
||||
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,13 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FDIMD2_H___
|
||||
#define ___SIMD_MATH_FDIMD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
/* fdim_v - compute the positive difference of x and y.
|
||||
*/
|
||||
vector double
|
||||
fdimd2 (vector double x, vector double y)
|
||||
static inline vector double
|
||||
_fdimd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_double2 v;
|
||||
vec_uint4 mask;
|
||||
@@ -44,3 +47,5 @@ fdimd2 (vector double x, vector double y)
|
||||
|
||||
return (v);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,12 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FDIMF4_H___
|
||||
#define ___SIMD_MATH_FDIMF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
fdimf4 (vector float x, vector float y)
|
||||
static inline vector float
|
||||
_fdimf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_float4 diff = spu_sub(x,y);
|
||||
return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,14 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FLOORD2_H___
|
||||
#define ___SIMD_MATH_FLOORD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
floord2(vector double in)
|
||||
static inline vector double
|
||||
_floord2(vector double in)
|
||||
{
|
||||
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
|
||||
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
|
||||
@@ -74,7 +77,7 @@ floord2(vector double in)
|
||||
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
|
||||
addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
|
||||
|
||||
/* insert
|
||||
/* insert
|
||||
*/
|
||||
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
|
||||
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
|
||||
@@ -92,3 +95,5 @@ floord2(vector double in)
|
||||
|
||||
return (out);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,28 +27,32 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FLOORF4_H___
|
||||
#define ___SIMD_MATH_FLOORF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
floorf4 (vector float x)
|
||||
static inline vector float
|
||||
_floorf4 (vector float x)
|
||||
{
|
||||
vec_int4 xi, xi1;
|
||||
vec_uint4 inrange;
|
||||
vec_float4 truncated, truncated1;
|
||||
vec_int4 xi, xi1;
|
||||
vec_uint4 inrange;
|
||||
vec_float4 truncated, truncated1;
|
||||
|
||||
// Find truncated value and one less.
|
||||
// Find truncated value and one less.
|
||||
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
|
||||
|
||||
xi = spu_convts( x, 0 );
|
||||
xi1 = spu_add( xi, -1 );
|
||||
xi = spu_convts( x, 0 );
|
||||
xi1 = spu_add( xi, -1 );
|
||||
|
||||
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
|
||||
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
|
||||
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
|
||||
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
|
||||
|
||||
// If truncated value is greater than input, subtract one.
|
||||
// If truncated value is greater than input, subtract one.
|
||||
|
||||
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
|
||||
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMAD2_H___
|
||||
#define ___SIMD_MATH_FMAD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
fmad2 (vector double x, vector double y, vector double z)
|
||||
static inline vector double
|
||||
_fmad2 (vector double x, vector double y, vector double z)
|
||||
{
|
||||
return spu_madd(x,y,z);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,12 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMAF4_H___
|
||||
#define ___SIMD_MATH_FMAF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
fmaf4 (vector float x, vector float y, vector float z)
|
||||
static inline vector float
|
||||
_fmaf4 (vector float x, vector float y, vector float z)
|
||||
{
|
||||
return spu_madd(x,y,z);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,6 +27,8 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMAXD2_H___
|
||||
#define ___SIMD_MATH_FMAXD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
@@ -36,8 +38,8 @@
|
||||
* is returned.
|
||||
*/
|
||||
|
||||
vector double
|
||||
fmaxd2 (vector double x, vector double y)
|
||||
static inline vector double
|
||||
_fmaxd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_ullong2 selector, denorm;
|
||||
vec_double2 x_offset, y_offset, diff;
|
||||
@@ -66,3 +68,4 @@ fmaxd2 (vector double x, vector double y)
|
||||
return spu_sel(x, y, selector);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMAXF4_H___
|
||||
#define ___SIMD_MATH_FMAXF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector float
|
||||
fmaxf4 (vector float x, vector float y)
|
||||
static inline vector float
|
||||
_fmaxf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_sel( x, y, spu_cmpgt( y, x ) );
|
||||
return spu_sel( x, y, spu_cmpgt( y, x ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,6 +27,9 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMIND2_H___
|
||||
#define ___SIMD_MATH_FMIND2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
@@ -35,8 +38,8 @@
|
||||
* is returned.
|
||||
*/
|
||||
|
||||
vector double
|
||||
fmind2 (vector double x, vector double y)
|
||||
static inline vector double
|
||||
_fmind2 (vector double x, vector double y)
|
||||
{
|
||||
vec_ullong2 selector, denorm;
|
||||
vec_double2 x_offset, y_offset, diff;
|
||||
@@ -65,3 +68,4 @@ fmind2 (vector double x, vector double y)
|
||||
return spu_sel(x, y, selector);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMINF4_H___
|
||||
#define ___SIMD_MATH_FMINF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector float
|
||||
fminf4 (vector float x, vector float y)
|
||||
static inline vector float
|
||||
_fminf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_sel( x, y, spu_cmpgt( x, y ) );
|
||||
return spu_sel( x, y, spu_cmpgt( x, y ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,10 +27,14 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMODD2_H___
|
||||
#define ___SIMD_MATH_FMODD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/_vec_utils.h>
|
||||
|
||||
/*
|
||||
* a vector is returned that contains the remainder of xi/yi,
|
||||
* for coresponding elements of vector double x and vector double y,
|
||||
@@ -41,11 +45,8 @@
|
||||
* magnitude less than |yi|
|
||||
*/
|
||||
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
|
||||
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb);
|
||||
|
||||
vector double fmodd2(vector double x, vector double y)
|
||||
static inline vector double
|
||||
_fmodd2(vector double x, vector double y)
|
||||
{
|
||||
int shift0, shift1;
|
||||
vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
@@ -82,20 +83,20 @@ vector double fmodd2(vector double x, vector double y)
|
||||
exp_y = spu_rlmask(y_hi, -20);
|
||||
|
||||
// y>x
|
||||
resultx = _vec_gt64(abs_y, abs_x);
|
||||
resultx = __vec_gt64(abs_y, abs_x);
|
||||
|
||||
//is Inf, is Nan
|
||||
x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
|
||||
x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
|
||||
x_inf = __vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
|
||||
x_nan = spu_andc(x_7ff, x_inf);
|
||||
|
||||
y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
|
||||
y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
|
||||
y_inf = __vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
|
||||
y_nan = spu_andc(y_7ff, y_inf);
|
||||
|
||||
// is zero
|
||||
zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
|
||||
zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
|
||||
zero_x = __vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
|
||||
zero_y = __vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
|
||||
|
||||
|
||||
/* Determine ilogb of abs_x and abs_y and
|
||||
@@ -121,8 +122,8 @@ vector double fmodd2(vector double x, vector double y)
|
||||
cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);
|
||||
|
||||
/*
|
||||
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
|
||||
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
|
||||
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
|
||||
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
|
||||
*/
|
||||
//norm
|
||||
mant_x_norm = spu_or(implied_1, frac_x);
|
||||
@@ -225,8 +226,8 @@ vector double fmodd2(vector double x, vector double y)
|
||||
shift0 = spu_extract(cnt, 0);
|
||||
shift1 = spu_extract(cnt, 2);
|
||||
/*
|
||||
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
|
||||
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
|
||||
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
|
||||
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
|
||||
*/
|
||||
norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
|
||||
norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
|
||||
@@ -236,11 +237,11 @@ vector double fmodd2(vector double x, vector double y)
|
||||
|
||||
//denorm
|
||||
/*
|
||||
shift = spu_add((vec_int4)exp_y, -1);
|
||||
shift0 = spu_extract(shift, 0);
|
||||
shift1 = spu_extract(shift, 2);
|
||||
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
|
||||
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
|
||||
shift = spu_add((vec_int4)exp_y, -1);
|
||||
shift0 = spu_extract(shift, 0);
|
||||
shift1 = spu_extract(shift, 2);
|
||||
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
|
||||
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
|
||||
*/
|
||||
shift = spu_add(power, -1);
|
||||
shift0 = spu_extract(shift, 0);
|
||||
@@ -278,25 +279,4 @@ vector double fmodd2(vector double x, vector double y)
|
||||
return ((vec_double2)result);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* extend spu_cmpgt function to 64bit data
|
||||
*/
|
||||
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
|
||||
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
|
||||
}
|
||||
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
|
||||
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
|
||||
}
|
||||
|
||||
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
|
||||
{
|
||||
vec_uint4 eq = spu_cmpeq(aa, bb);
|
||||
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,60 +27,68 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FMODF4_H___
|
||||
#define ___SIMD_MATH_FMODF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/divf4.h>
|
||||
#include <simdmath/fabsf4.h>
|
||||
#include <simdmath/copysignf4.h>
|
||||
|
||||
//
|
||||
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
|
||||
// If x == 0, the result is 0.
|
||||
// If x != 0 and y == 0, the result is undefined.
|
||||
|
||||
vector float
|
||||
fmodf4 (vector float x, vector float y)
|
||||
static inline vector float
|
||||
_fmodf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_float4 q, xabs, yabs, qabs, xabs2;
|
||||
vec_int4 qi0, qi1, qi2;
|
||||
vec_float4 i0, i1, i2, r1, r2, i;
|
||||
vec_uint4 inrange;
|
||||
vec_float4 q, xabs, yabs, qabs, xabs2;
|
||||
vec_int4 qi0, qi1, qi2;
|
||||
vec_float4 i0, i1, i2, r1, r2, i;
|
||||
vec_uint4 inrange;
|
||||
|
||||
// Find i = truncated_integer(|x/y|)
|
||||
// Find i = truncated_integer(|x/y|)
|
||||
|
||||
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
|
||||
// Thus i is either the truncated quotient, one less, or one greater.
|
||||
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
|
||||
// Thus i is either the truncated quotient, one less, or one greater.
|
||||
|
||||
q = divf4( x, y );
|
||||
xabs = fabsf4( x );
|
||||
yabs = fabsf4( y );
|
||||
qabs = fabsf4( q );
|
||||
xabs2 = spu_add( xabs, xabs );
|
||||
q = _divf4( x, y );
|
||||
xabs = _fabsf4( x );
|
||||
yabs = _fabsf4( y );
|
||||
qabs = _fabsf4( q );
|
||||
xabs2 = spu_add( xabs, xabs );
|
||||
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
|
||||
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
|
||||
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
|
||||
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
|
||||
|
||||
qi1 = spu_convts( qabs, 0 );
|
||||
qi0 = spu_add( qi1, -1 );
|
||||
qi2 = spu_add( qi1, 1 );
|
||||
qi1 = spu_convts( qabs, 0 );
|
||||
qi0 = spu_add( qi1, -1 );
|
||||
qi2 = spu_add( qi1, 1 );
|
||||
|
||||
i0 = spu_convtf( qi0, 0 );
|
||||
i1 = spu_convtf( qi1, 0 );
|
||||
i2 = spu_convtf( qi2, 0 );
|
||||
i0 = spu_convtf( qi0, 0 );
|
||||
i1 = spu_convtf( qi1, 0 );
|
||||
i2 = spu_convtf( qi2, 0 );
|
||||
|
||||
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
|
||||
// 2*|x| - i*|y| >= |x|:
|
||||
//
|
||||
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
|
||||
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
|
||||
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
|
||||
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
|
||||
// 2*|x| - i*|y| >= |x|:
|
||||
//
|
||||
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
|
||||
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
|
||||
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
|
||||
|
||||
r1 = spu_nmsub( i1, yabs, xabs2 );
|
||||
r2 = spu_nmsub( i2, yabs, xabs2 );
|
||||
r1 = spu_nmsub( i1, yabs, xabs2 );
|
||||
r2 = spu_nmsub( i2, yabs, xabs2 );
|
||||
|
||||
i = i0;
|
||||
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
|
||||
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
|
||||
i = i0;
|
||||
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
|
||||
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
|
||||
|
||||
i = copysignf4( i, q );
|
||||
i = _copysignf4( i, q );
|
||||
|
||||
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
|
||||
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
|
||||
}
|
||||
|
||||
#endif
|
||||
83
Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h
Normal file
83
Extras/simdmathlibrary/spu/simdmath/fpclassifyd2.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FPCLASSIFYD2_H___
|
||||
#define ___SIMD_MATH_FPCLASSIFYD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
static inline vector signed long long
|
||||
_fpclassifyd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 mask;
|
||||
vec_llong2 classtype;
|
||||
vec_uint4 cmpgt, cmpeq;
|
||||
|
||||
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
|
||||
classtype = spu_splats((long long)FP_NORMAL);
|
||||
|
||||
//FP_NAN: all-ones exponent and non-zero mantissa
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
|
||||
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
|
||||
|
||||
//FP_INFINITE: all-ones exponent and zero mantissa
|
||||
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
|
||||
|
||||
//FP_ZERO: zero exponent and zero mantissa
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
|
||||
|
||||
//FP_SUBNORMAL: zero exponent and non-zero mantissa
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
|
||||
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
|
||||
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
|
||||
|
||||
return classtype;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,52 +27,41 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FPCLASSIFYF4_H___
|
||||
#define ___SIMD_MATH_FPCLASSIFYF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef FP_NAN
|
||||
#define FP_NAN (0)
|
||||
#endif
|
||||
#ifndef FP_INFINITE
|
||||
#define FP_INFINITE (1)
|
||||
#endif
|
||||
#ifndef FP_ZERO
|
||||
#define FP_ZERO (2)
|
||||
#endif
|
||||
#ifndef FP_SUBNORMAL
|
||||
#define FP_SUBNORMAL (3)
|
||||
#endif
|
||||
#ifndef FP_NORMAL
|
||||
#define FP_NORMAL (4)
|
||||
#endif
|
||||
|
||||
vector signed int
|
||||
fpclassifyf4 (vector float x)
|
||||
static inline vector signed int
|
||||
_fpclassifyf4 (vector float x)
|
||||
{
|
||||
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
|
||||
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
|
||||
|
||||
vec_uint4 mask;
|
||||
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
|
||||
vec_int4 classtype = (vec_int4)zero;
|
||||
vec_uint4 mask;
|
||||
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
|
||||
vec_int4 classtype = (vec_int4)zero;
|
||||
|
||||
//FP_NAN: NaN not supported on SPU, never return FP_NAN
|
||||
//FP_NAN: NaN not supported on SPU, never return FP_NAN
|
||||
|
||||
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
|
||||
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
|
||||
|
||||
//FP_ZERO: zero exponent and zero mantissa
|
||||
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
|
||||
unclassified = spu_andc( unclassified, mask );
|
||||
//FP_ZERO: zero exponent and zero mantissa
|
||||
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
|
||||
unclassified = spu_andc( unclassified, mask );
|
||||
|
||||
//FP_SUBNORMAL: zero exponent and non-zero mantissa
|
||||
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
|
||||
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
|
||||
unclassified = spu_andc( unclassified, mask );
|
||||
//FP_SUBNORMAL: zero exponent and non-zero mantissa
|
||||
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
|
||||
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
|
||||
unclassified = spu_andc( unclassified, mask );
|
||||
|
||||
//FP_NORMAL: none of the above
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
|
||||
//FP_NORMAL: none of the above
|
||||
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
|
||||
|
||||
return classtype;
|
||||
return classtype;
|
||||
}
|
||||
|
||||
#endif
|
||||
98
Extras/simdmathlibrary/spu/simdmath/frexpd2.h
Normal file
98
Extras/simdmathlibrary/spu/simdmath/frexpd2.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FREXPD2_H___
|
||||
#define ___SIMD_MATH_FREXPD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#define __FREXPD_DBL_NAN 0x7FF8000000000000ull
|
||||
|
||||
static inline vector double
|
||||
_frexpd2 (vector double x, vector signed long long *pexp)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, isinf, iszero;
|
||||
vec_ullong2 e0, x0, x1;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, sh, ex;
|
||||
vec_double2 fr, frac = (vec_double2)zero;
|
||||
|
||||
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
|
||||
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
|
||||
frac = spu_sel( frac, (vec_double2)spu_splats(__FREXPD_DBL_NAN), isnan );
|
||||
|
||||
//INF: x is infinite (all-ones exponent and zero mantissa)
|
||||
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
frac = spu_sel( frac, x , isinf );
|
||||
|
||||
//x is zero (zero exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
|
||||
|
||||
frac = spu_sel( frac, (vec_double2)zero , iszero );
|
||||
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal
|
||||
|
||||
//...shift left to normalize fraction, zero shift if normal
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
|
||||
|
||||
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
|
||||
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
|
||||
fr = (vec_double2)spu_sel( x1, x0, maskdw );
|
||||
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
|
||||
fr = spu_sel( fr, x, sign );
|
||||
|
||||
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
|
||||
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
|
||||
|
||||
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
|
||||
return frac;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,21 +27,26 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_FREXPF4_H___
|
||||
#define ___SIMD_MATH_FREXPF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
frexpf4 (vector float x, vector signed int *pexp)
|
||||
static inline vector float
|
||||
_frexpf4 (vector float x, vector signed int *pexp)
|
||||
{
|
||||
vec_int4 zeros = spu_splats((int)0);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
|
||||
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
|
||||
|
||||
vec_int4 expmask = spu_splats((int)0x7F800000);
|
||||
vec_int4 e1 = spu_and((vec_int4)x, expmask);
|
||||
vec_uint4 expmask = spu_splats(0x7F800000U);
|
||||
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
|
||||
vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
|
||||
*pexp = spu_sel(e2, zeros, zeromask);
|
||||
|
||||
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
|
||||
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
|
||||
|
||||
return spu_sel(m2, (vec_float4)zeros, zeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,21 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_HYPOTD2_H___
|
||||
#define ___SIMD_MATH_HYPOTD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
hypotd2 (vector double x, vector double y)
|
||||
#include <simdmath/sqrtd2.h>
|
||||
|
||||
static inline vector double
|
||||
_hypotd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_double2 sum = spu_mul(x,x);
|
||||
sum = spu_madd(y,y,sum);
|
||||
|
||||
return sqrtd2(sum);
|
||||
return _sqrtd2(sum);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,21 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_HYPOTF4_H___
|
||||
#define ___SIMD_MATH_HYPOTF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector float
|
||||
hypotf4 (vector float x, vector float y)
|
||||
#include <simdmath/sqrtf4.h>
|
||||
|
||||
static inline vector float
|
||||
_hypotf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_float4 sum = spu_mul(x,x);
|
||||
sum = spu_madd(y,y,sum);
|
||||
|
||||
return sqrtf4(sum);
|
||||
return _sqrtf4(sum);
|
||||
}
|
||||
|
||||
#endif
|
||||
83
Extras/simdmathlibrary/spu/simdmath/ilogbd2.h
Normal file
83
Extras/simdmathlibrary/spu/simdmath/ilogbd2.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ILOGBD2_H___
|
||||
#define ___SIMD_MATH_ILOGBD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
static inline vector signed long long
|
||||
_ilogbd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, iszeroinf;
|
||||
vec_llong2 ilogb = (vec_llong2)zero;
|
||||
vec_llong2 e1, e2;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, lz1;
|
||||
|
||||
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
|
||||
|
||||
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
|
||||
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
|
||||
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
|
||||
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
|
||||
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
|
||||
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
|
||||
|
||||
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
|
||||
|
||||
return ilogb;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,22 +27,24 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ILOGBF4_H___
|
||||
#define ___SIMD_MATH_ILOGBF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef FP_ILOGB0
|
||||
#define FP_ILOGB0 ((int)0x80000001)
|
||||
#endif
|
||||
|
||||
vector signed int
|
||||
ilogbf4 (vector float x)
|
||||
static inline vector signed int
|
||||
_ilogbf4 (vector float x)
|
||||
{
|
||||
vec_int4 minus127 = spu_splats((int)-127);
|
||||
|
||||
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
|
||||
vec_uint4 zeromask = spu_cmpeq(e1, 0);
|
||||
vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
|
||||
|
||||
return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -30,10 +30,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_IRINTF4_H___
|
||||
#define ___SIMD_MATH_IRINTF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector signed int irintf4(vector float in)
|
||||
static inline vector signed int
|
||||
_irintf4(vector float in)
|
||||
{
|
||||
return spu_convts(in,0);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -29,10 +29,14 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_IROUNDF4_H___
|
||||
#define ___SIMD_MATH_IROUNDF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector signed int iroundf4(vector float in)
|
||||
static inline vector signed int
|
||||
_iroundf4(vector float in)
|
||||
{
|
||||
vec_int4 exp, out;
|
||||
vec_uint4 addend;
|
||||
@@ -53,3 +57,5 @@ vector signed int iroundf4(vector float in)
|
||||
|
||||
return (out);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,20 +27,25 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_IS0DENORMD2_H___
|
||||
#define ___SIMD_MATH_IS0DENORMD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector unsigned long long
|
||||
is0denormd2 (vector double x)
|
||||
static inline vector unsigned long long
|
||||
_is0denormd2 (vector double x)
|
||||
{
|
||||
vec_double2 xexp;
|
||||
vec_ullong2 cmp;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_double2 xexp;
|
||||
vec_ullong2 cmp;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
|
||||
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
|
||||
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
|
||||
cmp = spu_shuffle( cmp, cmp, even );
|
||||
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
|
||||
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
|
||||
cmp = spu_shuffle( cmp, cmp, even );
|
||||
|
||||
return cmp;
|
||||
return cmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_IS0DENORMF4_H___
|
||||
#define ___SIMD_MATH_IS0DENORMF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
is0denormf4 (vector float x)
|
||||
static inline vector unsigned int
|
||||
_is0denormf4 (vector float x)
|
||||
{
|
||||
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
|
||||
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,28 +27,35 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISEQUALD2_H___
|
||||
#define ___SIMD_MATH_ISEQUALD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isequald2 (vector double x, vector double y)
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
_isequald2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
|
||||
vec_ullong2 bothzero;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
|
||||
vec_ullong2 bothzero;
|
||||
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
|
||||
spu_or( isnand2( x ), isnand2( y ) ) );
|
||||
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
|
||||
spu_or( _isnand2( x ), _isnand2( y ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISEQUALF4_H___
|
||||
#define ___SIMD_MATH_ISEQUALF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isequalf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_isequalf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_cmpeq(x, y);
|
||||
return spu_cmpeq(x, y);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,21 +27,25 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISFINITED2_H___
|
||||
#define ___SIMD_MATH_ISFINITED2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isfinited2 (vector double x)
|
||||
static inline vector unsigned long long
|
||||
_isfinited2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 cmpr;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 cmpr;
|
||||
|
||||
//Finite unless NaN or Inf, check for 'not all-ones exponent'
|
||||
//Finite unless NaN or Inf, check for 'not all-ones exponent'
|
||||
|
||||
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
|
||||
cmpr = spu_shuffle( cmpr, cmpr, even);
|
||||
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
|
||||
cmpr = spu_shuffle( cmpr, cmpr, even);
|
||||
|
||||
return cmpr;
|
||||
return cmpr;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,19 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISFINITEF4_H___
|
||||
#define ___SIMD_MATH_ISFINITEF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isfinitef4 (vector float x)
|
||||
static inline vector unsigned int
|
||||
_isfinitef4 (vector float x)
|
||||
{
|
||||
(void)x;
|
||||
(void)x;
|
||||
|
||||
// NaN, INF not supported on SPU, result always a mask of ones
|
||||
return spu_splats((unsigned int)0xffffffff);
|
||||
// NaN, INF not supported on SPU, result always a mask of ones
|
||||
return spu_splats((unsigned int)0xffffffff);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,39 +27,45 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISGREATERD2_H___
|
||||
#define ___SIMD_MATH_ISGREATERD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isgreaterd2 (vector double x, vector double y)
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
_isgreaterd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
|
||||
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
|
||||
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
|
||||
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
|
||||
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,41 +27,47 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISGREATEREQUALD2_H___
|
||||
#define ___SIMD_MATH_ISGREATEREQUALD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isgreaterequald2 (vector double x, vector double y)
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
_isgreaterequald2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
|
||||
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
|
||||
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
|
||||
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
|
||||
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
|
||||
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
|
||||
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,15 +27,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISGREATEREQUALF4_H___
|
||||
#define ___SIMD_MATH_ISGREATEREQUALF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isgreaterequalf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_isgreaterequalf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_uint4 var;
|
||||
vec_uint4 var;
|
||||
|
||||
var = spu_cmpgt(y, x);
|
||||
var = spu_cmpgt(y, x);
|
||||
|
||||
return spu_nor(var, var);
|
||||
return spu_nor(var, var);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISGREATERF4_H___
|
||||
#define ___SIMD_MATH_ISGREATERF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isgreaterf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_isgreaterf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_cmpgt(x, y);
|
||||
return spu_cmpgt(x, y);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,21 +27,25 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISINFD2_H___
|
||||
#define ___SIMD_MATH_ISINFD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
|
||||
vector unsigned long long
|
||||
isinfd2 (vector double x)
|
||||
static inline vector unsigned long long
|
||||
_isinfd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_double2 xabs;
|
||||
vec_ullong2 cmp;
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_double2 xabs;
|
||||
vec_ullong2 cmp;
|
||||
|
||||
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
|
||||
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
|
||||
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
|
||||
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
|
||||
|
||||
return cmp;
|
||||
return cmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,19 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISINFF4_H___
|
||||
#define ___SIMD_MATH_ISINFF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isinff4 (vector float x)
|
||||
static inline vector unsigned int
|
||||
_isinff4 (vector float x)
|
||||
{
|
||||
(void)x;
|
||||
(void)x;
|
||||
|
||||
// INF not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
// INF not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,38 +27,45 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSD2_H___
|
||||
#define ___SIMD_MATH_ISLESSD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
islessd2 (vector double x, vector double y)
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
_islessd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
|
||||
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
|
||||
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
|
||||
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
|
||||
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,40 +27,47 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSEQUALD2_H___
|
||||
#define ___SIMD_MATH_ISLESSEQUALD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
islessequald2 (vector double x, vector double y)
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
static inline vector unsigned long long
|
||||
_islessequald2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
|
||||
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
|
||||
vec_ullong2 bothneg, bothzero;
|
||||
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
|
||||
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
|
||||
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
|
||||
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
|
||||
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
|
||||
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
|
||||
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
|
||||
bothneg = spu_shuffle( bothneg, bothneg, even );
|
||||
|
||||
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
|
||||
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
|
||||
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
|
||||
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,15 +27,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSEQUALF4_H___
|
||||
#define ___SIMD_MATH_ISLESSEQUALF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
islessequalf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_islessequalf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_uint4 var;
|
||||
vec_uint4 var;
|
||||
|
||||
var = spu_cmpgt(x, y);
|
||||
var = spu_cmpgt(x, y);
|
||||
|
||||
return spu_nor(var, var);
|
||||
return spu_nor(var, var);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,16 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSF4_H___
|
||||
#define ___SIMD_MATH_ISLESSF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
islessf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_islessf4 (vector float x, vector float y)
|
||||
{
|
||||
return spu_cmpgt(y, x);
|
||||
return spu_cmpgt(y, x);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,29 +27,35 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSGREATERD2_H___
|
||||
#define ___SIMD_MATH_ISLESSGREATERD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
islessgreaterd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
|
||||
vec_ullong2 bothzero;
|
||||
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
#include <simdmath/isnand2.h>
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
|
||||
static inline vector unsigned long long
|
||||
_islessgreaterd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
|
||||
vec_ullong2 bothzero;
|
||||
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
|
||||
|
||||
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
|
||||
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
|
||||
|
||||
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
|
||||
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
|
||||
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
|
||||
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
|
||||
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
|
||||
|
||||
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
|
||||
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,15 +27,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISLESSGREATERF4_H___
|
||||
#define ___SIMD_MATH_ISLESSGREATERF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
islessgreaterf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_islessgreaterf4 (vector float x, vector float y)
|
||||
{
|
||||
vec_uint4 var;
|
||||
vec_uint4 var;
|
||||
|
||||
var = spu_cmpeq(x, y);
|
||||
var = spu_cmpeq(x, y);
|
||||
|
||||
return spu_nor(var, var);
|
||||
return spu_nor(var, var);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,26 +27,30 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISNAND2_H___
|
||||
#define ___SIMD_MATH_ISNAND2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isnand2 (vector double x)
|
||||
static inline vector unsigned long long
|
||||
_isnand2 (vector double x)
|
||||
{
|
||||
vec_double2 xneg;
|
||||
vec_ullong2 cmpgt, cmpeq, cmpnan;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
|
||||
vec_double2 xneg;
|
||||
vec_ullong2 cmpgt, cmpeq, cmpnan;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
|
||||
|
||||
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
|
||||
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
|
||||
|
||||
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
|
||||
return cmpnan;
|
||||
return cmpnan;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,14 +27,19 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISNANF4_H___
|
||||
#define ___SIMD_MATH_ISNANF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isnanf4 (vector float x)
|
||||
static inline vector unsigned int
|
||||
_isnanf4 (vector float x)
|
||||
{
|
||||
(void)x;
|
||||
(void)x;
|
||||
|
||||
// NaN not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
// NaN not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,23 +27,27 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISNORMALD2_H___
|
||||
#define ___SIMD_MATH_ISNORMALD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isnormald2 (vector double x)
|
||||
static inline vector unsigned long long
|
||||
_isnormald2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 cmpr;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 cmpr;
|
||||
|
||||
//Normal unless nan, infinite, denorm, or zero
|
||||
//Normal unless nan, infinite, denorm, or zero
|
||||
|
||||
//Check for 'not zero or all-ones exponent'
|
||||
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
|
||||
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
|
||||
cmpr = spu_shuffle( cmpr, cmpr, even);
|
||||
//Check for 'not zero or all-ones exponent'
|
||||
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
|
||||
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
|
||||
cmpr = spu_shuffle( cmpr, cmpr, even);
|
||||
|
||||
return cmpr;
|
||||
return cmpr;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,12 +27,17 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISNORMALF4_H___
|
||||
#define ___SIMD_MATH_ISNORMALF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isnormalf4 (vector float x)
|
||||
static inline vector unsigned int
|
||||
_isnormalf4 (vector float x)
|
||||
{
|
||||
// NaN, INF not supported on SPU; normal unless zero
|
||||
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
|
||||
// NaN, INF not supported on SPU; normal unless zero
|
||||
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,37 +27,41 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISUNORDEREDD2_H___
|
||||
#define ___SIMD_MATH_ISUNORDEREDD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned long long
|
||||
isunorderedd2 (vector double x, vector double y)
|
||||
static inline vector unsigned long long
|
||||
_isunorderedd2 (vector double x, vector double y)
|
||||
{
|
||||
vec_double2 neg;
|
||||
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
|
||||
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
|
||||
vec_double2 neg;
|
||||
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
|
||||
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
|
||||
|
||||
//Check if x is nan
|
||||
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
|
||||
//Check if x is nan
|
||||
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
|
||||
|
||||
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
|
||||
//Check if y is nan
|
||||
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
|
||||
//Check if y is nan
|
||||
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
|
||||
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
|
||||
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
|
||||
|
||||
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
|
||||
return spu_or( cmpnanx, cmpnany );
|
||||
return spu_or( cmpnanx, cmpnany );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,15 +27,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_ISUNORDEREDF4_H___
|
||||
#define ___SIMD_MATH_ISUNORDEREDF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector unsigned int
|
||||
isunorderedf4 (vector float x, vector float y)
|
||||
static inline vector unsigned int
|
||||
_isunorderedf4 (vector float x, vector float y)
|
||||
{
|
||||
(void)x;
|
||||
(void)y;
|
||||
(void)x;
|
||||
(void)y;
|
||||
|
||||
// NaN not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
// NaN not supported on SPU, result always zero
|
||||
return spu_splats((unsigned int)0x00000000);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -29,17 +29,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LDEXPD2_H___
|
||||
#define ___SIMD_MATH_LDEXPD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector double
|
||||
ldexpd2(vector double x, vector signed long long ex)
|
||||
static inline vector double
|
||||
_ldexpd2(vector double x, vector signed long long ex)
|
||||
{
|
||||
vec_int4 e1, e2;
|
||||
vec_int4 min = spu_splats(-2099);
|
||||
// vec_int4 min = spu_splats(-2044);
|
||||
// vec_int4 min = spu_splats(-2044);
|
||||
vec_int4 max = spu_splats( 2098);
|
||||
// vec_int4 max = spu_splats( 2046);
|
||||
// vec_int4 max = spu_splats( 2046);
|
||||
vec_uint4 cmp_min, cmp_max;
|
||||
vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
|
||||
vec_double2 f1, f2;
|
||||
@@ -83,7 +86,7 @@ ldexpd2(vector double x, vector signed long long ex)
|
||||
|
||||
/* Compute the product x * 2^e1 * 2^e2
|
||||
*/
|
||||
// out = spu_mul(spu_mul(x, f1), f2);
|
||||
// out = spu_mul(spu_mul(x, f1), f2);
|
||||
|
||||
// check floating point register DENORM bit
|
||||
vec_uint4 fpscr0, fpscr;
|
||||
@@ -159,7 +162,7 @@ ldexpd2(vector double x, vector signed long long ex)
|
||||
|
||||
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
|
||||
maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
|
||||
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
|
||||
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
|
||||
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
|
||||
minmask = spu_shuffle(minmask, minmask, splat_msb);
|
||||
|
||||
@@ -245,9 +248,9 @@ ldexpd2(vector double x, vector signed long long ex)
|
||||
vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
|
||||
|
||||
// select right answer
|
||||
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask);
|
||||
x = spu_sel(x, (vec_double2)zeros, minmask);
|
||||
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask);
|
||||
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask);
|
||||
x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask);
|
||||
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask);
|
||||
|
||||
out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);
|
||||
|
||||
@@ -260,4 +263,4 @@ ldexpd2(vector double x, vector signed long long ex)
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -27,26 +27,30 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LDEXPF4_H___
|
||||
#define ___SIMD_MATH_LDEXPF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
vector float
|
||||
ldexpf4 (vector float x, vector signed int exp)
|
||||
|
||||
static inline vector float
|
||||
_ldexpf4 (vector float x, vector signed int exp)
|
||||
{
|
||||
vec_int4 zeros = spu_splats(0);
|
||||
|
||||
vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000);
|
||||
vec_uint4 expmask = spu_splats(0x7F800000U);
|
||||
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
|
||||
vec_int4 e2 = spu_rlmask(e1,-23);
|
||||
|
||||
vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255);
|
||||
vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp);
|
||||
minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros));
|
||||
vec_uint4 maxmask = spu_cmpgt(exp, 255);
|
||||
vec_uint4 minmask = spu_cmpgt(spu_splats(-255), exp);
|
||||
minmask = spu_or (minmask, spu_cmpeq(x, (vec_float4)zeros));
|
||||
|
||||
vec_int4 esum = spu_add(e2, exp);
|
||||
|
||||
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255));
|
||||
maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF));
|
||||
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
|
||||
maxmask = spu_or (maxmask, spu_cmpgt(esum, 255));
|
||||
maxmask = spu_and(maxmask, spu_splats(0x7FFFFFFFU));
|
||||
minmask = spu_or (minmask, spu_cmpgt(zeros, esum));
|
||||
|
||||
x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
|
||||
x = spu_sel(x, (vec_float4)zeros, minmask);
|
||||
@@ -54,3 +58,5 @@ ldexpf4 (vector float x, vector signed int exp)
|
||||
x = spu_sel(x, (vec_float4)maxmask, maxmask);
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,11 +27,14 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLABSI2_H___
|
||||
#define ___SIMD_MATH_LLABSI2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
vector signed long long
|
||||
llabsi2 (vector signed long long in)
|
||||
static inline vector signed long long
|
||||
_llabsi2 (vector signed long long in)
|
||||
{
|
||||
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
|
||||
sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
|
||||
@@ -43,3 +46,5 @@ llabsi2 (vector signed long long in)
|
||||
|
||||
return ((vec_llong2)(res));
|
||||
}
|
||||
|
||||
#endif
|
||||
85
Extras/simdmathlibrary/spu/simdmath/lldivi2.h
Normal file
85
Extras/simdmathlibrary/spu/simdmath/lldivi2.h
Normal file
@@ -0,0 +1,85 @@
|
||||
/* lldivi2 -
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLDIVI2_H___
|
||||
#define ___SIMD_MATH_LLDIVI2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/_lldiv.h>
|
||||
#include <simdmath/lldivu2.h>
|
||||
|
||||
static inline vector signed long long
|
||||
__lldivi2_negatell2 (vector signed long long x)
|
||||
{
|
||||
vector signed int zero = (vector signed int){0,0,0,0};
|
||||
vector signed int borrow;
|
||||
|
||||
borrow = spu_genb(zero, (vec_int4)x);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
|
||||
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
|
||||
}
|
||||
|
||||
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
|
||||
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
static inline lldivi2_t
|
||||
_lldivi2 (vector signed long long numer, vector signed long long denom)
|
||||
{
|
||||
lldivi2_t res;
|
||||
lldivu2_t resAbs;
|
||||
vec_ullong2 numerAbs, denomAbs;
|
||||
vec_uint4 numerPos, denomPos, quotNeg;
|
||||
|
||||
// Determine whether result needs sign change
|
||||
|
||||
numerPos = spu_cmpgt((vec_int4)numer, -1);
|
||||
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
|
||||
denomPos = spu_cmpgt((vec_int4)denom, -1);
|
||||
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
|
||||
quotNeg = spu_xor( numerPos, denomPos );
|
||||
|
||||
// Use absolute values of numerator, denominator
|
||||
|
||||
numerAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(numer), numer, (vec_ullong2)numerPos);
|
||||
denomAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(denom), denom, (vec_ullong2)denomPos);
|
||||
|
||||
// Get difference of leading zeros.
|
||||
|
||||
resAbs = _lldivu2(numerAbs, denomAbs);
|
||||
res.quot = spu_sel((vec_llong2)resAbs.quot, __lldivi2_negatell2((vec_llong2)resAbs.quot),
|
||||
(vec_ullong2)quotNeg);
|
||||
res.rem = spu_sel(__lldivi2_negatell2((vec_llong2)resAbs.rem), (vec_llong2)resAbs.rem,
|
||||
(vec_ullong2)numerPos);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,46 +27,51 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLDIVU2_H___
|
||||
#define ___SIMD_MATH_LLDIVU2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include "lldiv.h"
|
||||
|
||||
#include <simdmath/_lldiv.h>
|
||||
|
||||
// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of
|
||||
// numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
|
||||
|
||||
lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
|
||||
static inline lldivu2_t
|
||||
_lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
|
||||
{
|
||||
lldivu2_t res;
|
||||
vec_uint4 denomZeros, numerZeros;
|
||||
vec_int4 shift;
|
||||
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
|
||||
vec_ullong2 quot, newQuot;
|
||||
vec_ullong2 newNum, skip, cont;
|
||||
int anyCont;
|
||||
lldivu2_t res;
|
||||
vec_uint4 denomZeros, numerZeros;
|
||||
vec_int4 shift;
|
||||
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
|
||||
vec_ullong2 quot, newQuot;
|
||||
vec_ullong2 newNum, skip, cont;
|
||||
int anyCont;
|
||||
|
||||
// Get difference of leading zeros.
|
||||
// Get difference of leading zeros.
|
||||
|
||||
denomZeros = (vec_uint4)ll_spu_cntlz( denom );
|
||||
numerZeros = (vec_uint4)ll_spu_cntlz( numer );
|
||||
denomZeros = (vec_uint4)__ll_spu_cntlz( denom );
|
||||
numerZeros = (vec_uint4)__ll_spu_cntlz( numer );
|
||||
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
|
||||
|
||||
// Shift denom to align leading one with numerator's
|
||||
// Shift denom to align leading one with numerator's
|
||||
|
||||
denomShifted = ll_spu_sl( denom, (vec_ullong2)shift );
|
||||
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
|
||||
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) );
|
||||
denomShifted = __ll_spu_sl( denom, (vec_ullong2)shift );
|
||||
oneShifted = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
|
||||
oneShifted = spu_sel( oneShifted, spu_splats(0ull), __ll_spu_cmpeq_zero( denom ) );
|
||||
|
||||
// Shift left all leading zeros.
|
||||
// Shift left all leading zeros.
|
||||
|
||||
denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros );
|
||||
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
|
||||
denomLeft = __ll_spu_sl( denom, (vec_ullong2)denomZeros );
|
||||
oneLeft = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
|
||||
|
||||
quot = spu_splats(0ull);
|
||||
quot = spu_splats(0ull);
|
||||
|
||||
do
|
||||
{
|
||||
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
|
||||
do
|
||||
{
|
||||
cont = __ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
|
||||
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
|
||||
|
||||
newQuot = spu_or( quot, oneShifted );
|
||||
@@ -74,25 +79,26 @@ lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long de
|
||||
// Subtract shifted denominator from remaining numerator
|
||||
// when denominator is not greater.
|
||||
|
||||
skip = ll_spu_cmpgt( denomShifted, numer );
|
||||
newNum = ll_spu_sub( numer, denomShifted );
|
||||
skip = __ll_spu_cmpgt( denomShifted, numer );
|
||||
newNum = __ll_spu_sub( numer, denomShifted );
|
||||
|
||||
// If denominator is greater, next shift is one more, otherwise
|
||||
// next shift is number of leading zeros of remaining numerator.
|
||||
|
||||
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
|
||||
numerZeros = (vec_uint4)spu_sel( __ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
|
||||
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
|
||||
|
||||
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
|
||||
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
|
||||
oneShifted = __ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
|
||||
denomShifted = __ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
|
||||
|
||||
quot = spu_sel( newQuot, quot, skip );
|
||||
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
|
||||
}
|
||||
while ( anyCont );
|
||||
}
|
||||
while ( anyCont );
|
||||
|
||||
res.quot = quot;
|
||||
res.rem = numer;
|
||||
return res;
|
||||
res.quot = quot;
|
||||
res.rem = numer;
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -28,6 +28,9 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLRINTD2_H___
|
||||
#define ___SIMD_MATH_LLRINTD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
@@ -35,8 +38,8 @@
|
||||
// Handles no exception
|
||||
// over flow will return unspecified data
|
||||
|
||||
vector signed long long
|
||||
llrintd2 (vector double in)
|
||||
static inline vector signed long long
|
||||
_llrintd2 (vector double in)
|
||||
{
|
||||
int shift0, shift1;
|
||||
vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
|
||||
@@ -67,8 +70,8 @@ llrintd2 (vector double in)
|
||||
bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
|
||||
bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));
|
||||
|
||||
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
|
||||
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
|
||||
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
|
||||
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
|
||||
mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));
|
||||
|
||||
/* Determine how many bits to shift the mantissa to correctly
|
||||
@@ -102,9 +105,11 @@ llrintd2 (vector double in)
|
||||
mant = spu_xor(mant, sign);
|
||||
borrow = spu_genb(mant, sign);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
|
||||
4,5,6,7, 192,192,192,192,
|
||||
12,13,14,15, 192,192,192,192}));
|
||||
4,5,6,7, 192,192,192,192,
|
||||
12,13,14,15, 192,192,192,192}));
|
||||
mant = spu_subx(mant, sign, borrow);
|
||||
|
||||
return ((vec_llong2)(mant));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -28,6 +28,9 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLRINTF4_H___
|
||||
#define ___SIMD_MATH_LLRINTF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
@@ -35,8 +38,8 @@
|
||||
// Handles no exception
|
||||
// over flow will return unspecified data
|
||||
|
||||
llroundf4_t
|
||||
llrintf4 (vector float in)
|
||||
static inline llroundf4_t
|
||||
_llrintf4 (vector float in)
|
||||
{
|
||||
llroundf4_t res;
|
||||
vec_int4 exp;
|
||||
@@ -100,3 +103,5 @@ llrintf4 (vector float in)
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -28,6 +28,9 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLROUNDD2_H___
|
||||
#define ___SIMD_MATH_LLROUNDD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
@@ -35,8 +38,8 @@
|
||||
// Handles no exception
|
||||
// over flow will return unspecified data
|
||||
|
||||
vector signed long long
|
||||
llroundd2 (vector double in)
|
||||
static inline vector signed long long
|
||||
_llroundd2 (vector double in)
|
||||
{
|
||||
int shift0, shift1;
|
||||
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
|
||||
@@ -72,7 +75,7 @@ llroundd2 (vector double in)
|
||||
*/
|
||||
addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
|
||||
addend = spu_rlmask(addend, -7);
|
||||
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));
|
||||
|
||||
/* Compute the two's complement of the mantissa if the
|
||||
@@ -84,9 +87,11 @@ llroundd2 (vector double in)
|
||||
mant = spu_xor(mant, sign);
|
||||
borrow = spu_genb(mant, sign);
|
||||
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
|
||||
4,5,6,7, 192,192,192,192,
|
||||
12,13,14,15, 192,192,192,192}));
|
||||
4,5,6,7, 192,192,192,192,
|
||||
12,13,14,15, 192,192,192,192}));
|
||||
mant = spu_subx(mant, sign, borrow);
|
||||
|
||||
return ((vec_llong2)(mant));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -28,6 +28,9 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LLROUNDF4_H___
|
||||
#define ___SIMD_MATH_LLROUNDF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
@@ -35,8 +38,8 @@
|
||||
// Handles no exception
|
||||
// over flow will return unspecified data
|
||||
|
||||
llroundf4_t
|
||||
llroundf4 (vector float in)
|
||||
static inline llroundf4_t
|
||||
_llroundf4 (vector float in)
|
||||
{
|
||||
llroundf4_t res;
|
||||
vec_int4 exp;
|
||||
@@ -90,8 +93,8 @@ llroundf4 (vector float in)
|
||||
addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
|
||||
addend0 = spu_rlmask(addend0, -7);
|
||||
addend1 = spu_rlmask(addend1, -7);
|
||||
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
|
||||
res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
|
||||
res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));
|
||||
|
||||
@@ -113,3 +116,5 @@ llroundf4 (vector float in)
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,53 +27,57 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LOG10F4_H___
|
||||
#define ___SIMD_MATH_LOG10F4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
#define _LOG10F_H_loga2msb ((float)0.3010299205780f)
|
||||
#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f)
|
||||
#define _LOG10F_H_logaemsb ((float)0.4342944622040f)
|
||||
#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f)
|
||||
#define _LOG10F_H_logae ((float)0.4342944819033f)
|
||||
#define __LOG10F_loga2msb 0.3010299205780f
|
||||
#define __LOG10F_loga2lsb 7.5085978266e-8f
|
||||
#define __LOG10F_logaemsb 0.4342944622040f
|
||||
#define __LOG10F_logaelsb 1.9699272335e-8f
|
||||
#define __LOG10F_logae 0.4342944819033f
|
||||
|
||||
#define _LOG10F_H_c0 ((float)(0.2988439998f))
|
||||
#define _LOG10F_H_c1 ((float)(0.3997655209f))
|
||||
#define _LOG10F_H_c2 ((float)(0.6666679125f))
|
||||
#define __LOG10F_c0 0.2988439998f
|
||||
#define __LOG10F_c1 0.3997655209f
|
||||
#define __LOG10F_c2 0.6666679125f
|
||||
|
||||
vector float
|
||||
log10f4 (vector float x)
|
||||
static inline vector float
|
||||
_log10f4 (vector float x)
|
||||
{
|
||||
vec_int4 zeros = spu_splats((int)0);
|
||||
vec_float4 ones = spu_splats(1.0f);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
|
||||
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
|
||||
|
||||
vec_int4 expmask = spu_splats((int)0x7F800000);
|
||||
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
|
||||
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
|
||||
vec_uint4 expmask = spu_splats(0x7F800000U);
|
||||
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 );
|
||||
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
|
||||
|
||||
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
|
||||
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
|
||||
x = spu_sel(x , spu_add(x, x) , mask);
|
||||
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
|
||||
|
||||
vec_float4 x1 = spu_sub(x , ones);
|
||||
vec_float4 z = divf4 (x1, spu_add(x, ones));
|
||||
vec_float4 z = _divf4 (x1, spu_add(x, ones));
|
||||
vec_float4 w = spu_mul(z , z);
|
||||
vec_float4 polyw;
|
||||
polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1));
|
||||
polyw = spu_madd(polyw , w, spu_splats(_LOG10F_H_c2));
|
||||
polyw = spu_madd(spu_splats(__LOG10F_c0), w, spu_splats(__LOG10F_c1));
|
||||
polyw = spu_madd(polyw , w, spu_splats(__LOG10F_c2));
|
||||
|
||||
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
|
||||
vec_float4 wnew = spu_convtf(xexp,0);
|
||||
|
||||
vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1,
|
||||
spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew));
|
||||
vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1,
|
||||
spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew,
|
||||
spu_mul(spu_splats(_LOG10F_H_logae), yneg))
|
||||
vec_float4 zz1 = spu_madd(spu_splats(__LOG10F_logaemsb), x1,
|
||||
spu_mul(spu_splats(__LOG10F_loga2msb),wnew));
|
||||
vec_float4 zz2 = spu_madd(spu_splats(__LOG10F_logaelsb), x1,
|
||||
spu_madd(spu_splats(__LOG10F_loga2lsb), wnew,
|
||||
spu_mul(spu_splats(__LOG10F_logae), yneg))
|
||||
);
|
||||
|
||||
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -27,25 +27,34 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LOG1PF4_H___
|
||||
#define ___SIMD_MATH_LOG1PF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
vector float
|
||||
log1pf4 (vector float x)
|
||||
|
||||
#include <simdmath/logf4.h>
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
static inline vector float
|
||||
_log1pf4 (vector float x)
|
||||
{
|
||||
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
|
||||
spu_cmpgt(spu_splats(0.5f), x));
|
||||
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
|
||||
spu_cmpgt(spu_splats(0.5f), x));
|
||||
vec_float4 x2 = spu_mul(x,x);
|
||||
vec_float4 d0, d1, n0, n1;
|
||||
|
||||
d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f));
|
||||
d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f));
|
||||
d0 = spu_madd(x , spu_splats(1.5934420741f), spu_splats(0.8952856868f));
|
||||
d1 = spu_madd(x , spu_splats(0.1198195734f), spu_splats(0.8377145063f));
|
||||
d1 = spu_madd(x2, d1, d0);
|
||||
|
||||
n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f));
|
||||
n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f));
|
||||
n0 = spu_madd(x , spu_splats(1.1457993413f), spu_splats(0.8952856678f));
|
||||
n1 = spu_madd(x , spu_splats(0.0082862580f), spu_splats(0.3394238808f));
|
||||
n1 = spu_madd(x2, n1, n0);
|
||||
|
||||
return spu_sel(logf4(spu_add(x, spu_splats(1.0f))),
|
||||
spu_mul(x, divf4(n1, d1)),
|
||||
return spu_sel(_logf4(spu_add(x, spu_splats(1.0f))),
|
||||
spu_mul(x, _divf4(n1, d1)),
|
||||
nearzeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,45 +27,52 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LOG2F4_H___
|
||||
#define ___SIMD_MATH_LOG2F4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
|
||||
#define _LOG2F_H_l2emsb ((float)1.4426950216293f)
|
||||
#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f)
|
||||
#define _LOG2F_H_l2e ((float)1.4426950408890f)
|
||||
#include <simdmath/divf4.h>
|
||||
|
||||
#define _LOG2F_H_c0 ((float)(0.2988439998f))
|
||||
#define _LOG2F_H_c1 ((float)(0.3997655209f))
|
||||
#define _LOG2F_H_c2 ((float)(0.6666679125f))
|
||||
#define __LOG2F_l2emsb 1.4426950216293f
|
||||
#define __LOG2F_l2elsb 1.9259629911e-8f
|
||||
#define __LOG2F_l2e 1.4426950408890f
|
||||
|
||||
vector float
|
||||
log2f4 (vector float x)
|
||||
#define __LOG2F_c0 0.2988439998f
|
||||
#define __LOG2F_c1 0.3997655209f
|
||||
#define __LOG2F_c2 0.6666679125f
|
||||
|
||||
static inline vector float
|
||||
_log2f4 (vector float x)
|
||||
{
|
||||
vec_int4 zeros = spu_splats((int)0);
|
||||
vec_float4 ones = spu_splats(1.0f);
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
|
||||
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
|
||||
|
||||
vec_int4 expmask = spu_splats((int)0x7F800000);
|
||||
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
|
||||
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
|
||||
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uint4)expmask);
|
||||
|
||||
|
||||
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
|
||||
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
|
||||
x = spu_sel(x , spu_add(x, x) , mask);
|
||||
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
|
||||
|
||||
vec_float4 x1 = spu_sub(x , ones);
|
||||
vec_float4 z = divf4(x1, spu_add(x, ones));
|
||||
vec_float4 z = _divf4(x1, spu_add(x, ones));
|
||||
vec_float4 w = spu_mul(z , z);
|
||||
vec_float4 polyw;
|
||||
polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1));
|
||||
polyw = spu_madd(polyw , w, spu_splats(_LOG2F_H_c2));
|
||||
polyw = spu_madd(spu_splats(__LOG2F_c0), w, spu_splats(__LOG2F_c1));
|
||||
polyw = spu_madd(polyw , w, spu_splats(__LOG2F_c2));
|
||||
|
||||
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
|
||||
vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0));
|
||||
vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1,
|
||||
spu_mul(spu_splats(_LOG2F_H_l2e), yneg)
|
||||
vec_float4 zz1 = spu_madd(spu_splats(__LOG2F_l2emsb), x1, spu_convtf(xexp,0));
|
||||
vec_float4 zz2 = spu_madd(spu_splats(__LOG2F_l2elsb), x1,
|
||||
spu_mul(spu_splats(__LOG2F_l2e), yneg)
|
||||
);
|
||||
|
||||
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
86
Extras/simdmathlibrary/spu/simdmath/logbd2.h
Normal file
86
Extras/simdmathlibrary/spu/simdmath/logbd2.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
|
||||
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LOGBD2_H___
|
||||
#define ___SIMD_MATH_LOGBD2_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
static inline vector double
|
||||
_logbd2 (vector double x)
|
||||
{
|
||||
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
|
||||
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
|
||||
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
|
||||
|
||||
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
|
||||
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
|
||||
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
|
||||
|
||||
vec_ullong2 isnan, isinf, iszero;
|
||||
vec_double2 logb = (vec_double2)zero;
|
||||
vec_llong2 e1, e2;
|
||||
vec_uint4 cmpgt, cmpeq, cmpzr;
|
||||
vec_int4 lz, lz0, lz1;
|
||||
|
||||
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
|
||||
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
|
||||
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
|
||||
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
|
||||
spu_shuffle( cmpgt, cmpgt, odd ) ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats(0x7FF8000000000000ll), isnan );
|
||||
|
||||
//INF: x is infinite (all-ones exponent and zero mantissa)
|
||||
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats(__builtin_huge_val()), isinf );
|
||||
|
||||
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
|
||||
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
|
||||
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
|
||||
logb = spu_sel( logb, (vec_double2)spu_splats(-__builtin_huge_val()), iszero );
|
||||
|
||||
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
|
||||
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
|
||||
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
|
||||
|
||||
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
|
||||
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
|
||||
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
|
||||
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
|
||||
|
||||
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
|
||||
spu_nor( isnan, spu_or( isinf, iszero ) ) );
|
||||
|
||||
return logb;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -27,20 +27,20 @@
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ___SIMD_MATH_LOGBF4_H___
|
||||
#define ___SIMD_MATH_LOGBF4_H___
|
||||
|
||||
#include <simdmath.h>
|
||||
#include <spu_intrinsics.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef HUGE_VALF
|
||||
#define HUGE_VALF __builtin_huge_valf ()
|
||||
#endif
|
||||
|
||||
vector float
|
||||
logbf4 (vector float x)
|
||||
static inline vector float
|
||||
_logbf4 (vector float x)
|
||||
{
|
||||
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
|
||||
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
|
||||
vec_uint4 zeromask = spu_cmpeq(e1, 0);
|
||||
e1 = spu_sub(e1, spu_splats((int)0x3F800000));
|
||||
return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
|
||||
}
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user