Added SCE SIMD math library in Extras/simdmathlibrary

The upcoming vectormath that will used to speed up the SPU version of Extras/BulletMultiThreaded depends on this.
This commit is contained in:
ejcoumans
2007-07-23 04:58:24 +00:00
parent 685138d033
commit 7529cdb3f6
287 changed files with 32064 additions and 0 deletions

View File

@@ -0,0 +1,131 @@
# make file to build the libsimdmath library for SPU
# Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms,
# with or without modification, are permitted provided that the
# following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Sony Computer Entertainment Inc nor the names
# of its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# All that you do to add a file is edit OBJS, the rest will just work
prefix = /usr
DESTDIR =
OBJS = fabsd2.o fabsf4.o truncf4.o divf4.o tanf4.o isnanf4.o isnand2.o isinff4.o isinfd2.o \
is0denormf4.o is0denormd2.o recipd2.o divd2.o tand2.o sqrtf4.o absi4.o sqrtd2.o \
sinf4.o isgreaterd2.o sind2.o sincosf4.o rsqrtf4.o signbitf4.o signbitd2.o \
rsqrtd2.o copysignf4.o remainderf4.o recipf4.o copysignd2.o log2f4.o \
negatef4.o negated2.o modff4.o asinf4.o frexpf4.o frexpd2.o ldexpf4.o cbrtf4.o \
cosd2.o cosf4.o hypotf4.o hypotd2.o ceilf4.o fmaf4.o fmaxf4.o fminf4.o floorf4.o \
fdimf4.o fmodf4.o negatei4.o logf4.o log1pf4.o log10f4.o expm1f4.o \
expf4.o divi4.o exp2f4.o powf4.o atanf4.o atan2f4.o acosf4.o ilogbf4.o ilogbd2.o \
logbf4.o logbd2.o llroundd2.o llroundf4.o llrintf4.o isequalf4.o isequald2.o \
islessgreaterf4.o islessgreaterd2.o islessf4.o islessd2.o isgreaterf4.o \
isgreaterd2.o islessequalf4.o islessequald2.o isgreaterequalf4.o isgreaterequald2.o \
isfinitef4.o isfinited2.o isnormalf4.o isnormald2.o isunorderedf4.o isunorderedd2.o \
llrintd2.o roundf4.o rintf4.o irintf4.o iroundf4.o fmad2.o fmaxd2.o fmind2.o fdimd2.o \
nextafterd2.o fpclassifyf4.o fpclassifyd2.o nearbyintd2.o nextafterf4.o nearbyintf4.o \
llabsi2.o truncd2.o roundd2.o rintd2.o negatell2.o divu4.o modfd2.o lldivu2.o \
ceild2.o floord2.o ldexpd2.o scalbnf4.o scalbllnd2.o lldivi2.o remquof4.o remquod2.o\
fmodd2.o remainderd2.o
INCLUDES_SPU = -I../
CROSS_SPU = spu-
AR_SPU = $(CROSS_SPU)ar
CC_SPU = $(CROSS_SPU)gcc
CXX_SPU = $(CROSS_SPU)g++
RANLIB_SPU = $(CROSS_SPU)ranlib
TEST_CMD_SPU =
CFLAGS_SPU=$(INCLUDES_SPU) -O2 -W -Wall
INSTALL = install
MAKE_DEFS = \
prefix='$(prefix)' \
DESTDIR='$(DESTDIR)' \
LIB_BASE='$(LIB_BASE)' \
LIB_NAME='$(LIB_NAME)' \
STATIC_LIB='$(STATIC_LIB)' \
CROSS_SPU='$(CROSS_SPU)' \
AR_SPU='$(AR_SPU)' \
CC_SPU='$(CC_SPU)' \
CXX_SPU='$(CXX_SPU)' \
RANLIB_SPU='$(RANLIB_SPU)' \
TEST_CMD_SPU='$(TEST_CMD_SPU)' \
INSTALL='$(INSTALL)'
LIB_BASE = simdmath
LIB_NAME = lib$(LIB_BASE)
STATIC_LIB = $(LIB_NAME).a
all: $(STATIC_LIB)
$(STATIC_LIB): $(OBJS)
$(AR_SPU) cr $@ $(OBJS)
$(RANLIB_SPU) $@
install: $(STATIC_LIB)
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/include
$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/spu/include/
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/spu/lib
$(INSTALL) $(STATIC_LIB) $(DESTDIR)$(prefix)/spu/lib/$(STATIC_LIB)
clean:
cd tests; $(MAKE) $(MAKE_DEFS) clean
rm -f $(OBJS)
rm -f $(STATIC_LIB)
$(OBJS): ../simdmath.h
check: $(STATIC_LIB)
cd tests; $(MAKE) $(MAKE_DEFS); $(MAKE) $(MAKE_DEFS) check
# Some Objects have special header files.
sinf4.o sind2.o sincosf4.o cosd2.o: sincos_c.h
lldivu2.o lldivi2.o : lldiv.h
%.o: %.c
$(CC_SPU) $(CFLAGS_SPU) -c $<
#----------
# C++
#----------
%.o: %.C
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cpp
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cc
$(CXX_SPU) $(CFLAGS_SPU) -c $<
%.o: %.cxx
$(CXX_SPU) $(CFLAGS_SPU) -c $<

View File

@@ -0,0 +1,40 @@
/* absi4 - for each of four integer slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int
absi4 (vector signed int x)
{
vec_int4 neg;
neg = spu_sub( 0, x );
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
}

View File

@@ -0,0 +1,78 @@
/* acosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Computes the inverse cosine of all four slots of x
//
vector float
acosf4 (vector float x)
{
vec_float4 result, xabs;
vec_float4 t1;
vec_float4 xabs2, xabs4;
vec_float4 hi, lo;
vec_float4 neg, pos;
vec_uint4 select;
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
t1 = sqrtf4(spu_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = spu_mul(xabs, xabs);
xabs4 = spu_mul(xabs2, xabs2);
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
result = spu_madd(hi, xabs4, lo);
/* Adjust the result if x is negactive.
*/
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
pos = spu_mul(t1, result);
result = spu_sel(pos, neg, select);
return result;
}

View File

@@ -0,0 +1,85 @@
/* asinf4 - Computes the inverse sine of all four slots of x
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
asinf4 (vector float x)
{
// positive = (x > 0)
//
vec_uchar16 positive = (vec_uchar16)spu_cmpgt(x,spu_splats(0.0f));
// gtHalf = (|x| > 0.5)
//
vec_uchar16 gtHalf = (vec_uchar16)spu_cmpabsgt(x,spu_splats(0.5f));
// x = absf(x)
//
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
x = spu_sel(x,spu_mul(spu_splats(-2.0f),sqrtf4(g)),gtHalf);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
denom = spu_madd(denom,g,spu_splats(5.603603363f));
num = spu_mul(spu_mul(x,g),num);
// x = x + num / denom
//
x = spu_add(x,divf4(num,denom));
// if (x > 0.5)
// x = x + M_PI_2
//
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
// if (!positive) x = -x
//
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
return x;
}

View File

@@ -0,0 +1,60 @@
/* atan2f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Inverse tangent function of two variables
//
vector float
atan2f4 (vector float y, vector float x)
{
vec_float4 res = atanf4(divf4(y,x));
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vec_uchar16 yNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),y);
vec_uchar16 xNeg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f),x);
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
vec_float4 newRes = spu_add(bias, res);
res = spu_sel(res,newRes,xNeg);
return res;
}

View File

@@ -0,0 +1,76 @@
/* atanf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Computes the inverse tangent of all four slots of x.
//
vector float
atanf4 (vector float x)
{
vec_float4 bias;
vec_float4 x2, x3, x4, x8, x9;
vec_float4 hi, lo;
vec_float4 result;
vec_float4 inv_x;
vec_uint4 sign;
vec_uint4 select;
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
inv_x = recipf4(x);
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
bias = (vec_float4)spu_and((vec_uint4)bias, select);
x = spu_sel(x, inv_x, select);
bias = spu_add(bias, x);
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x9 = spu_mul(x8, x);
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
lo = spu_madd(lo, x3, bias);
result = spu_madd(hi, x9, lo);
return result;
}

View File

@@ -0,0 +1,105 @@
/* cbrtf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define __calcQuot(xexp) n = xexp; \
vec_uchar16 negxexpmask = (vec_uchar16)spu_cmpgt(spu_splats(0), n); \
n = spu_sel(n, spu_add(n,2), negxexpmask); \
\
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4)); \
quot = spu_add(quot, spu_rlmaska(quot, -4)); \
quot = spu_add(quot, spu_rlmaska(quot, -8)); \
quot = spu_add(quot, spu_rlmaska(quot,-16)); \
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1)); \
quot = spu_add( \
quot, \
spu_rlmaska( \
spu_add( \
spu_add(r,5), \
spu_sl (r,2) \
), \
-4 \
) \
); \
#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3)
#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
vector float
cbrtf4 (vector float x)
{
vec_float4 zeros = spu_splats(0.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, zeros);
vec_int4 xexp, n;
vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
x = spu_and(x, sgnmask);
x = frexpf4(x, &xexp);
vec_float4 p = spu_madd(
spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
x,
spu_splats(0.492659620528969547f)
);
vec_float4 p3 = spu_mul(p, spu_mul(p, p));
vec_int4 quot;
__calcQuot(xexp);
vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
vec_float4 factor = spu_splats((float)(1.0/_CBRTF_H_sqr_cbrt2));
factor = spu_sel(factor, spu_splats((float)(1.0/_CBRTF_H_cbrt2)), spu_cmpeq(modval,-1));
factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0));
factor = spu_sel(factor, spu_splats((float)( _CBRTF_H_cbrt2)), spu_cmpeq(modval, 1));
factor = spu_sel(factor, spu_splats((float)(_CBRTF_H_sqr_cbrt2)), spu_cmpeq(modval, 2));
vec_float4 pre = spu_mul(p, factor);
vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
vec_float4 res = spu_mul(pre, divf4(numr, denr));
res = ldexpf4(res, quot);
return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
zeros,
zeromask);
}
/*
_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
{
vec_uchar16 neg = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 res = exp2f4(spu_mul(spu_splats((float)0.3333333333333f), log2f4(absx)));
res = spu_sel(res, spu_or(sbit, res), neg);
return res;
}
*/

View File

@@ -0,0 +1,94 @@
/* ceild2 - for each of two doule slots, round up to smallest integer not less than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
ceild2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 one = ((vec_uint4){0, 1, 0, 1});
vec_int4 exp, shift;
vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
vec_double2 in_hi, out;
vec_double2 one_d = spu_splats((double)1.0);
vec_uint4 zero = spu_splats((unsigned int)0x0);
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
in_hi = spu_shuffle(in, in, splat_hi);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
/* clamp shift to the range 0 to -31.
*/
shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
exp_ge0 = spu_cmpgt(exp, 0x3FE);
mask = spu_orc(frac_mask, exp_ge0);
/* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
*/
mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
pos = spu_cmpgt((vec_int4)in_hi, -1);
//pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_and(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
/* insert
*/
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
// e_sign = spu_sel(spu_splats((unsigned int)0x0), (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
e_sign = spu_and( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
/* replace insert
*/
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
/* in + addend
*/
out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
return (out);
}

View File

@@ -0,0 +1,54 @@
/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
ceilf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one greater.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, 1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is less than input, add one.
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
}

View File

@@ -0,0 +1,39 @@
/* copysignd2 - for each of two double slots, return value with magnitude from x and sign from y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double copysignd2 (vector double x, vector double y)
{
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
}

View File

@@ -0,0 +1,39 @@
/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
copysignf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_splats(0x80000000) );
}

View File

@@ -0,0 +1,127 @@
/* cosd2 - Computes the cosine of the each of two double slots.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "sincos_c.h"
vector double
cosd2 (vector double x)
{
vec_double2 xl,xl2,xl3,res;
vec_double2 nan = (vec_double2)spu_splats(0x7ff8000000000000ull);
vec_uchar16 copyEven = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_double2 tiny = (vec_double2)spu_splats(0x3e40000000000000ull);
// Range reduction using : xl = angle * TwoOverPi;
//
xl = spu_mul(x, spu_splats(0.63661977236758134307553505349005744));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(x))*sign(x))
//
xl = spu_add(xl,spu_sel(spu_splats(0.5),xl,spu_splats(0x8000000000000000ull)));
vec_float4 xf = spu_roundtf(xl);
vec_int4 q = spu_convts(xf,0);
q = spu_shuffle(q,q,copyEven);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = spu_add(spu_splats(1), spu_and(q,spu_splats(0x3)));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = spu_convtf(q,0);
vec_double2 qd = spu_extend(qf);
vec_double2 p1 = spu_nmsub(qd,spu_splats(_SINCOS_KC1D),x);
xl = spu_nmsub(qd,spu_splats(_SINCOS_KC2D),p1);
// Check if |xl| is a really small number
//
vec_double2 absXl = (vec_double2)spu_andc((vec_ullong2)xl, spu_splats(0x8000000000000000ull));
vec_ullong2 isTiny = (vec_ullong2)isgreaterd2(tiny,absXl);
// Compute x^2 and x^3
//
xl2 = spu_mul(xl,xl);
xl3 = spu_mul(xl2,xl);
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((((((c0 * xl2 + c1) * xl2 + c2) * xl2 + c3) * xl2 + c4) * xl2 + c5), and
// sx = xl + xl3 * (((((s0 * xl2 + s1) * xl2 + s2) * xl2 + s3) * xl2 + s4) * xl2 + s5)
//
vec_double2 ct0 = spu_mul(xl2,xl2);
vec_double2 ct1 = spu_madd(spu_splats(_SINCOS_CC0D),xl2,spu_splats(_SINCOS_CC1D));
vec_double2 ct2 = spu_madd(spu_splats(_SINCOS_CC2D),xl2,spu_splats(_SINCOS_CC3D));
vec_double2 ct3 = spu_madd(spu_splats(_SINCOS_CC4D),xl2,spu_splats(_SINCOS_CC5D));
vec_double2 st1 = spu_madd(spu_splats(_SINCOS_SC0D),xl2,spu_splats(_SINCOS_SC1D));
vec_double2 st2 = spu_madd(spu_splats(_SINCOS_SC2D),xl2,spu_splats(_SINCOS_SC3D));
vec_double2 st3 = spu_madd(spu_splats(_SINCOS_SC4D),xl2,spu_splats(_SINCOS_SC5D));
vec_double2 ct4 = spu_madd(ct2,ct0,ct3);
vec_double2 st4 = spu_madd(st2,ct0,st3);
vec_double2 ct5 = spu_mul(ct0,ct0);
vec_double2 ct6 = spu_madd(ct5,ct1,ct4);
vec_double2 st6 = spu_madd(ct5,st1,st4);
vec_double2 cx = spu_madd(ct6,xl2,spu_splats(1.0));
vec_double2 sx = spu_madd(st6,xl3,xl);
// Small angle approximation: sin(tiny) = tiny, cos(tiny) = 1.0
//
sx = spu_sel(sx,xl,isTiny);
cx = spu_sel(cx,spu_splats(1.0),isTiny);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_ullong2 mask1 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
res = spu_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_ullong2 mask2 = (vec_ullong2)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
mask2 = spu_shuffle(mask2,mask2,copyEven);
res = spu_sel((vec_double2)spu_xor(spu_splats(0x8000000000000000ull),(vec_ullong2)res),res,mask2);
// if input = +/-Inf return NAN
//
res = spu_sel(res, nan, isnand2 (x));
// if input = 0 or denorm return or 1.0
//
vec_ullong2 zeroMask = is0denormd2 (x);
res = spu_sel(res,spu_splats(1.0),zeroMask);
return res;
}

View File

@@ -0,0 +1,94 @@
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "sincos_c.h"
vector float
cosf4 (vector float x)
{
vec_float4 xl,xl2,xl3,res;
vec_int4 q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = spu_mul(x, spu_splats(0.63661977236f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = spu_add(xl,spu_sel(spu_splats(0.5f),xl,spu_splats(0x80000000)));
q = spu_convts(xl,0);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = spu_add(spu_splats(1),spu_and(q,spu_splats((int)0x3)));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = spu_convtf(q,0);
vec_float4 p1 = spu_nmsub(qf,spu_splats(_SINCOS_KC1),x);
xl = spu_nmsub(qf,spu_splats(_SINCOS_KC2),p1);
// Compute x^2 and x^3
//
xl2 = spu_mul(xl,xl);
xl3 = spu_mul(xl2,xl);
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vec_float4 ct1 = spu_madd(spu_splats(_SINCOS_CC0),xl2,spu_splats(_SINCOS_CC1));
vec_float4 st1 = spu_madd(spu_splats(_SINCOS_SC0),xl2,spu_splats(_SINCOS_SC1));
vec_float4 ct2 = spu_madd(ct1,xl2,spu_splats(_SINCOS_CC2));
vec_float4 st2 = spu_madd(st1,xl2,spu_splats(_SINCOS_SC2));
vec_float4 cx = spu_madd(ct2,xl2,spu_splats(1.0f));
vec_float4 sx = spu_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_uchar16 mask1 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x1),spu_splats((int)0));
res = spu_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_uchar16 mask2 = (vec_uchar16)spu_cmpeq(spu_and(offset,(int)0x2),spu_splats((int)0));
res = spu_sel((vec_float4)spu_xor(spu_splats(0x80000000),(vec_uint4)res),res,mask2);
return res;
}

View File

@@ -0,0 +1,41 @@
/* divd2 - for each of two double slots, divide numer by denom.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
// Equal to numer * recipd2(denom)
// See recipd2 for results of special values.
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
divd2 (vector double numer, vector double denom)
{
return spu_mul( numer, recipd2( denom ) );
}

View File

@@ -0,0 +1,46 @@
/* divf4 - for each of four float slots, divide numer by denom.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
divf4 (vector float numer, vector float denom)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Uses constant of 1.0 + 1 ulp to improve accuracy.
vector float y0, y0numer;
vector float oneish = (vector float)spu_splats(0x3f800001);
y0 = spu_re( denom );
y0numer = spu_mul( numer, y0 );
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
}

View File

@@ -0,0 +1,109 @@
/* divi4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
divi4_t divi4 (vector signed int numer, vector signed int denom)
{
divi4_t res;
vec_int4 quot, newQuot, shift;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 numerAbs, denomAbs;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
// Determine whether result needs sign change
numerPos = spu_cmpgt( numer, -1 );
denomPos = spu_cmpgt( denom, -1 );
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
denomZeros = spu_cntlz( denomAbs );
numerZeros = spu_cntlz( numerAbs );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = spu_sl( denomAbs, (vec_uint4)shift );
oneShifted = spu_sl( (vec_uint4)spu_splats(1), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, (vec_uint4)spu_splats(0), spu_cmpeq( denom, 0 ) );
// Shift left all leading zeros.
denomLeft = spu_sl( denomAbs, denomZeros );
oneLeft = spu_sl( (vec_uint4)spu_splats(1), denomZeros );
quot = spu_splats(0);
do
{
cont = spu_cmpgt( oneShifted, 0U );
anyCont = spu_extract( spu_gather( cont ), 0 );
newQuot = spu_or( quot, (vec_int4)oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = spu_cmpgt( denomShifted, numerAbs );
newNum = spu_sub( numerAbs, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
shift = (vec_int4)spu_sub( skip, numerZeros );
oneShifted = spu_rlmask( oneLeft, shift );
denomShifted = spu_rlmask( denomLeft, shift );
quot = spu_sel( newQuot, quot, skip );
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = spu_sel( quot, spu_sub( 0, quot ), quotNeg );
res.rem = spu_sel( spu_sub( 0, (vec_int4)numerAbs ), (vec_int4)numerAbs, numerPos );
return res;
}

View File

@@ -0,0 +1,97 @@
/* divu4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
// and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
divu4_t divu4 (vector unsigned int numer, vector unsigned int denom)
{
divu4_t res;
vec_int4 shift;
vec_uint4 quot, newQuot;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
denomZeros = spu_cntlz( denom );
numerZeros = spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = spu_sl( denom, (vec_uint4)shift );
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
// Shift left all leading zeros.
denomLeft = spu_sl( denom, denomZeros );
oneLeft = spu_sl( spu_splats(1U), denomZeros );
quot = spu_splats(0U);
do
{
cont = spu_cmpgt( oneShifted, 0U );
anyCont = spu_extract( spu_gather( cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = spu_cmpgt( denomShifted, numer );
newNum = spu_sub( numer, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
shift = (vec_int4)spu_sub( skip, numerZeros );
oneShifted = spu_rlmask( oneLeft, shift );
denomShifted = spu_rlmask( denomLeft, shift );
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
}

View File

@@ -0,0 +1,131 @@
/* exp2f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/*
* FUNCTION
* vec_float4 _exp2_v(vec_float4 x)
*
* DESCRIPTION
* _exp2_v computes 2 raised to the input vector x. Computation is
* performed by observing the 2^(a+b) = 2^a * 2^b.
* We decompose x into a and b (above) by letting.
* a = ceil(x), b = x - a;
*
* 2^a is easilty computed by placing a into the exponent
* or a floating point number whose mantissa is all zeros.
*
* 2^b is computed using the following polynomial approximation.
* (C. Hastings, Jr, 1955).
*
* __7__
* \
* \
* 2^(-x) = / Ci*x^i
* /____
* i=1
*
* for x in the range 0.0 to 1.0
*
* C0 = 1.0
* C1 = -0.9999999995
* C2 = 0.4999999206
* C3 = -0.1666653019
* C4 = 0.0416573475
* C5 = -0.0083013598
* C6 = 0.0013298820
* C7 = -0.0001413161
*
* This function does not handle out of range conditions. It
* assumes that x is in the range (-128.0, 127.0]. Values outside
* this range will produce undefined results.
*/
#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */
vector float
exp2f4 (vector float x)
{
vec_int4 ix;
vec_uint4 overflow, underflow;
vec_float4 frac, frac2, frac4;
vec_float4 exp_int, exp_frac;
vec_float4 result;
vec_float4 hi, lo;
vec_float4 bias;
/* Break in the input x into two parts ceil(x), x - ceil(x).
*/
bias = (vec_float4)(spu_rlmaska((vec_int4)(x), -31));
bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
ix = spu_convts(spu_add(x, bias), 0);
frac = spu_sub(spu_convtf(ix, 0), x);
frac = spu_mul(frac, spu_splats(_EXP2F_H_LN2));
// !!! HRD Changing weird un-understandable and incorrect overflow handling code
//overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));
overflow = spu_cmpgt(x, (vec_float4)spu_splats(0x4300FFFFu)); // !!! Biggest possible exponent to fit in range.
underflow = spu_cmpgt(spu_splats(-126.0f), x);
//exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
// !!! overflow (x >= 127.999999f)
exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation
exp_int = spu_add(exp_int, exp_int); // !!! HRD
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
frac2 = spu_mul(frac, frac);
frac4 = spu_mul(frac2, frac2);
hi = spu_madd(frac, spu_splats(-0.0001413161f), spu_splats(0.0013298820f));
hi = spu_madd(frac, hi, spu_splats(-0.0083013598f));
hi = spu_madd(frac, hi, spu_splats(0.0416573475f));
lo = spu_madd(frac, spu_splats(-0.1666653019f), spu_splats(0.4999999206f));
lo = spu_madd(frac, lo, spu_splats(-0.9999999995f));
lo = spu_madd(frac, lo, spu_splats(1.0f));
exp_frac = spu_madd(frac4, hi, lo);
ix = spu_add(ix, spu_rlmask((vec_int4)(exp_frac), -23));
result = spu_mul(exp_frac, exp_int);
/* Handle overflow */
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), (vec_uchar16)overflow);
result = spu_sel(result, (vec_float4)spu_splats(0), (vec_uchar16)underflow);
//result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));
return (result);
}

View File

@@ -0,0 +1,63 @@
/* expm1f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _EXPF_H_C1 ((float)-0.6931470632553101f)
#define _EXPF_H_C2 ((float)-1.1730463525082e-7f)
#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
vector float
expf4 (vector float x)
{
vec_uchar16 xnegmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 goffset = spu_sel(spu_splats((float) 0.5f),spu_splats((float)-0.5f),xnegmask);
vec_float4 g = spu_mul(x, spu_splats(_EXPF_H_INVLN2));
vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
g = spu_convtf(xexp, 0);
g = spu_madd(g, spu_splats(_EXPF_H_C2), spu_madd(g, spu_splats(_EXPF_H_C1), x));
vec_float4 z = spu_mul(g, g);
vec_float4 a = spu_mul(z, spu_splats((float)0.0999748594f));
vec_float4 b = spu_mul(g,
spu_madd(z,
spu_splats((float)0.0083208258f),
spu_splats((float)0.4999999992f)
)
);
vec_float4 foo = divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
spu_add(spu_splats(1.0f), spu_sub(a, b)));
return ldexpf4(foo, xexp);
}

View File

@@ -0,0 +1,54 @@
/* expm1f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
vector float
expm1f4 (vector float x)
{
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(_EXPM1F_H_ln1by2)),
spu_cmpgt(spu_splats(_EXPM1F_H_ln3by2), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats((float)-0.3203561199f), spu_splats((float)0.9483177697f));
d1 = spu_madd(x2, spu_splats((float) 0.0326527809f), d0);
n0 = spu_madd(x , spu_splats((float)0.1538026623f), spu_splats((float)0.9483177732f));
n1 = spu_madd(x , spu_splats((float)0.0024490478f), spu_splats((float)0.0305274668f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(spu_sub(expf4(x), spu_splats(1.0f)),
spu_mul(x, divf4(n1, d1)),
nearzeromask);
}

View File

@@ -0,0 +1,37 @@
/* fabsd2 - for each of two double slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double fabsd2 (vector double x)
{
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
}

View File

@@ -0,0 +1,37 @@
/* fabsf4 - for each of 4 float slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float fabsf4 (vector float x)
{
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
}

View File

@@ -0,0 +1,46 @@
/* fdimd2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/* fdim_v - compute the positive difference of x and y.
*/
vector double
fdimd2 (vector double x, vector double y)
{
vec_double2 v;
vec_uint4 mask;
v = spu_sub(x, y);
mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31));
return (v);
}

View File

@@ -0,0 +1,38 @@
/* fdimf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fdimf4 (vector float x, vector float y)
{
vec_float4 diff = spu_sub(x,y);
return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
}

View File

@@ -0,0 +1,94 @@
/* floord2 - for each of two doule slots, round up to smallest integer not more than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
floord2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 one = ((vec_uint4){0, 1, 0, 1});
vec_int4 exp, shift;
vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
vec_double2 in_hi, out;
vec_double2 one_d = spu_splats((double)1.0);
vec_uint4 zero = spu_splats((unsigned int)0x0);
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
in_hi = spu_shuffle(in, in, splat_hi);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
/* clamp shift to the range 0 to -31.
*/
shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
exp_ge0 = spu_cmpgt(exp, 0x3FE);
mask = spu_orc(frac_mask, exp_ge0);
/* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
*/
mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
pos = spu_cmpgt((vec_int4)in_hi, -1);
//pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
/* insert
*/
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
// e_sign = spu_sel((vec_uint4)one_d, zero, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
e_sign = spu_andc( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
/* replace insert
*/
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
/* in + addend
*/
out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
return (out);
}

View File

@@ -0,0 +1,54 @@
/* floorf4 - for each of four float slots, round down to largest integer not greater than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
floorf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one less.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, -1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is greater than input, subtract one.
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
}

View File

@@ -0,0 +1,37 @@
/* fmad2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
fmad2 (vector double x, vector double y, vector double z)
{
return spu_madd(x,y,z);
}

View File

@@ -0,0 +1,38 @@
/* fmaf4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fmaf4 (vector float x, vector float y, vector float z)
{
return spu_madd(x,y,z);
}

View File

@@ -0,0 +1,68 @@
/* fmaxd2 - for each of two double slots, compute maximum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/* Return the maximum numeric value of their arguments. If one argument
* is a NaN, fmax returns the other value. If both are NaNs, then a NaN
* is returned.
*/
vector double
fmaxd2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
vec_uint4 nan_x, abs_x, gt, eq;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
/* If both x and y are denorm or zero, then set 0x3ff to exponent
*/
denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
/* If x is a NaN, then select y as max
*/
abs_x = spu_andc((vec_uint4)x, sign);
gt = spu_cmpgt(abs_x, infinity);
eq = spu_cmpeq(abs_x, infinity);
nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
diff = spu_sub(x_offset, y_offset);
selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
return spu_sel(x, y, selector);
}

View File

@@ -0,0 +1,40 @@
/* fmaxf4 - for each of four float slots, compute maximum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fmaxf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( y, x ) );
}

View File

@@ -0,0 +1,67 @@
/* fmind2 - for each of two double slots, compute minimum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/* Return the minimum numeric value of their arguments. If one argument
* is a NaN, fmin returns the other value. If both are NaNs, then a NaN
* is returned.
*/
vector double
fmind2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
vec_uint4 nan_x, abs_x, gt, eq;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
/* If both x and y are denorm or zero, then set 0x3ff to exponent
*/
denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
/* If x is a NaN, then select y as min
*/
abs_x = spu_andc((vec_uint4)x, sign);
gt = spu_cmpgt(abs_x, infinity);
eq = spu_cmpeq(abs_x, infinity);
nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
diff = spu_sub(y_offset, x_offset);
selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
return spu_sel(x, y, selector);
}

View File

@@ -0,0 +1,40 @@
/* fminf4 - for each of four float slots, compute minimum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
fminf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( x, y ) );
}

View File

@@ -0,0 +1,302 @@
/* fmodd2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/*
* a vector is returned that contains the remainder of xi/yi,
* for coresponding elements of vector double x and vector double y,
* as described below:
* if yi is 0, the result is 0
* otherwise, the funciton determines the unique signed integer value i
* such that the returned element is xi - i * yi with the same sign as xi and
* magnitude less than |yi|
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb);
vector double fmodd2(vector double x, vector double y)
{
int shift0, shift1;
vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
vec_uchar16 propagate = (vec_uchar16){4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192};
vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11};
vec_uchar16 merge = (vec_uchar16){8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31};
vec_int4 n, shift, power;
vec_uint4 z;
vec_uint4 x_hi, y_hi;
vec_uint4 abs_x, abs_y;
vec_uint4 exp_x, exp_y;
vec_uint4 zero_x, zero_y;
vec_uint4 mant_x, mant_x0, mant_x1, mant_y ;
vec_uint4 norm, denorm, norm0, norm1, denorm0, denorm1;
vec_uint4 result, result0, resultx, cnt, sign, borrow, mask;
vec_uint4 x_7ff, x_inf, x_nan, y_7ff, y_inf, y_nan, is_normal;
vec_uint4 x_is_norm, y_is_norm, frac_x, frac_y, cnt_x, cnt_y, mant_x_norm, mant_y_norm;
vec_uint4 mant_x_denorm0, mant_x_denorm1, mant_x_denorm;
vec_uint4 mant_y_denorm0, mant_y_denorm1, mant_y_denorm;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
sign = spu_and((vec_uint4)x, sign_mask);
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
x_hi = spu_shuffle(abs_x, abs_x, splat_hi);
y_hi = spu_shuffle(abs_y, abs_y, splat_hi);
exp_x = spu_rlmask(x_hi, -20);
exp_y = spu_rlmask(y_hi, -20);
// y>x
resultx = _vec_gt64(abs_y, abs_x);
//is Inf, is Nan
x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
x_inf = _vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
x_nan = spu_andc(x_7ff, x_inf);
y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
y_inf = _vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
y_nan = spu_andc(y_7ff, y_inf);
// is zero
zero_x = _vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
zero_y = _vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
/* Determine ilogb of abs_x and abs_y and
* extract the mantissas (mant_x, mant_y)
*/
/* change form*/
// 0 -> ! is_normal
// 0 don't care (because (x=0, y!=0)match x<y, (x!=0 && y=0)match y=0, (x==0 && y==0) resultx)
x_is_norm = spu_cmpgt(x_hi, spu_splats((unsigned int)0x000fffff));
y_is_norm = spu_cmpgt(y_hi, spu_splats((unsigned int)0x000fffff));
frac_x = spu_and((vec_uint4)x, mant_mask);
frac_y = spu_and((vec_uint4)y, mant_mask);
//cntlz(use when denorm)
cnt_x = spu_cntlz(frac_x);
cnt_x = spu_add(cnt_x, spu_and(spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_x = spu_add(spu_shuffle(cnt_x, cnt_x, splat_hi), -11);
cnt_y = spu_cntlz(frac_y);
cnt_y = spu_add(cnt_y, spu_and(spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);
/*
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
*/
//norm
mant_x_norm = spu_or(implied_1, frac_x);
mant_y_norm = spu_or(implied_1, frac_y);
//denorm
shift0 = spu_extract(cnt_x, 0);
shift1 = spu_extract(cnt_x, 2);
mant_x_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_x, -8);
mant_x_denorm1 = spu_and((vec_uint4)frac_x, ((vec_uint4){0x0,0x0,-1,-1}));
mant_x_denorm0 = spu_slqwbytebc(spu_slqw(mant_x_denorm0, shift0), shift0);
mant_x_denorm1 = spu_slqwbytebc(spu_slqw(mant_x_denorm1, shift1), shift1);
mant_x_denorm = spu_shuffle(mant_x_denorm0, mant_x_denorm1, merge);
// vec_int4 shift_y = (vec_int4)spu_sub(cnt_y, spu_splats((unsigned int)11));
shift0 = spu_extract(cnt_y, 0);
shift1 = spu_extract(cnt_y, 2);
mant_y_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_y, -8);
mant_y_denorm1 = spu_and((vec_uint4)frac_y, ((vec_uint4){0x0,0x0,-1,-1}));
mant_y_denorm0 = spu_slqwbytebc(spu_slqw(mant_y_denorm0, shift0), shift0);
mant_y_denorm1 = spu_slqwbytebc(spu_slqw(mant_y_denorm1, shift1), shift1);
mant_y_denorm = spu_shuffle(mant_y_denorm0, mant_y_denorm1, merge);
// mant_x, mant_y( norm | denorm )
mant_x = spu_sel(mant_x_denorm, mant_x_norm, x_is_norm);
mant_y = spu_sel(mant_y_denorm, mant_y_norm, y_is_norm);
/* power
*/
vec_int4 power_x_norm = (vec_int4)exp_x;
vec_int4 power_x_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_x);
vec_int4 power_x = spu_sel(power_x_denorm, power_x_norm, x_is_norm);
vec_int4 power_y_norm = (vec_int4)exp_y;
vec_int4 power_y_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_y);
vec_int4 power_y = spu_sel(power_y_denorm, power_y_norm, y_is_norm);
/* Compute fixed point fmod of mant_x and mant_y. Set the flag,
* result0, to all ones if we detect that the final result is
* ever 0.
*/
result0 = spu_or(zero_x, zero_y);
// n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); //zhao--
n = spu_sub(power_x, power_y);
mask = spu_cmpgt(n, 0);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
/* Convert the result back to floating point and restore
* the sign. If we flagged the result to be zero (result0),
* zero it. If we flagged the result to equal its input x,
* (resultx) then return x.
*
* Double precision generates a denorm for an output.
*/
// normal = spu_cmpgt((vec_int4)exp_y, 0);//zhao--
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_and(spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(spu_shuffle(cnt, cnt, splat_hi), -11);
mant_x0 = spu_rlmaskqwbyte(mant_x, -8);
mant_x1 = spu_and(mant_x,((vec_uint4){0x0,0x0,-1,-1}));
power =spu_sub(power_y, (vec_int4)cnt);
is_normal = spu_cmpgt(power, 0);
//norm
shift0 = spu_extract(cnt, 0);
shift1 = spu_extract(cnt, 2);
/*
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
*/
norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
norm = spu_shuffle(norm0, norm1, merge);
//denorm
/*
shift = spu_add((vec_int4)exp_y, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
*/
shift = spu_add(power, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
// printf("result denorm: shift0=%d, shift1=%d\n",shift0, shift1);
denorm0 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm0, shift0), 7+shift0);
denorm1 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm1, shift1), 7+shift1);
denorm = spu_shuffle(denorm0, denorm1, merge);
// merge
mant_x = spu_sel(denorm, norm, is_normal);
exp_y = (vec_uint4)power;
exp_y = spu_and(spu_rl(exp_y, 20), is_normal);
result = spu_sel(exp_y, spu_or(sign, mant_x),((vec_uint4){0x800FFFFF, -1, 0x800FFFFF, -1}));
//y>x || y<=x
result = spu_sel(spu_andc(result, spu_rlmask(result0, -1)),
(vec_uint4)x, resultx);
//y=+-inf => 0
result = spu_sel(result, (vec_uint4)x, y_inf);
//x=+-inf => NaN
result = spu_sel(result, ((vec_uint4){0x7ff80000, 0x0, 0x7ff80000, 0x0}), x_inf);
//y=0 => 0
result = spu_andc(result, zero_y);
//x=NaN or y=NaN => 0
result = spu_sel(result, (vec_uint4)x, x_nan);
result = spu_sel(result, (vec_uint4)y, y_nan);
return ((vec_double2)result);
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vec_uint4 _vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 eq = spu_cmpeq(aa, bb);
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}

View File

@@ -0,0 +1,86 @@
/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
vector float
fmodf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, r1, r2, i;
vec_uint4 inrange;
// Find i = truncated_integer(|x/y|)
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
q = divf4( x, y );
xabs = fabsf4( x );
yabs = fabsf4( y );
qabs = fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
r1 = spu_nmsub( i1, yabs, xabs2 );
r2 = spu_nmsub( i2, yabs, xabs2 );
i = i0;
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
i = copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}

View File

@@ -0,0 +1,94 @@
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_NAN
#define FP_NAN (0)
#endif
#ifndef FP_INFINITE
#define FP_INFINITE (1)
#endif
#ifndef FP_ZERO
#define FP_ZERO (2)
#endif
#ifndef FP_SUBNORMAL
#define FP_SUBNORMAL (3)
#endif
#ifndef FP_NORMAL
#define FP_NORMAL (4)
#endif
vector signed long long
fpclassifyd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 mask;
vec_llong2 classtype;
vec_uint4 cmpgt, cmpeq;
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
classtype = spu_splats((long long)FP_NORMAL);
//FP_NAN: all-ones exponent and non-zero mantissa
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
//FP_INFINITE: all-ones exponent and zero mantissa
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
//FP_ZERO: zero exponent and zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
return classtype;
}

View File

@@ -0,0 +1,78 @@
/* fpclassifyf4 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_NAN
#define FP_NAN (0)
#endif
#ifndef FP_INFINITE
#define FP_INFINITE (1)
#endif
#ifndef FP_ZERO
#define FP_ZERO (2)
#endif
#ifndef FP_SUBNORMAL
#define FP_SUBNORMAL (3)
#endif
#ifndef FP_NORMAL
#define FP_NORMAL (4)
#endif
vector signed int
fpclassifyf4 (vector float x)
{
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
vec_uint4 mask;
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
vec_int4 classtype = (vec_int4)zero;
//FP_NAN: NaN not supported on SPU, never return FP_NAN
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
//FP_ZERO: zero exponent and zero mantissa
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
unclassified = spu_andc( unclassified, mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
unclassified = spu_andc( unclassified, mask );
//FP_NORMAL: none of the above
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
return classtype;
}

View File

@@ -0,0 +1,95 @@
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef DBL_NAN
#define DBL_NAN ((long long)0x7FF8000000000000ull)
#endif
vector double
frexpd2 (vector double x, vector signed long long *pexp)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_ullong2 e0, x0, x1;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, sh, ex;
vec_double2 fr, frac = (vec_double2)zero;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
frac = spu_sel( frac, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
frac = spu_sel( frac, x , isinf );
//x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
frac = spu_sel( frac, (vec_double2)zero , iszero );
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
//Integer Exponent: if x is normal or subnormal
//...shift left to normalize fraction, zero shift if normal
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
fr = (vec_double2)spu_sel( x1, x0, maskdw );
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
fr = spu_sel( fr, x, sign );
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
return frac;
}

View File

@@ -0,0 +1,47 @@
/* frexpf4 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
frexpf4 (vector float x, vector signed int *pexp)
{
vec_int4 zeros = spu_splats((int)0);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 e1 = spu_and((vec_int4)x, expmask);
vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
*pexp = spu_sel(e2, zeros, zeromask);
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
return spu_sel(m2, (vec_float4)zeros, zeromask);
}

View File

@@ -0,0 +1,40 @@
/* hypotd2 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
hypotd2 (vector double x, vector double y)
{
vec_double2 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return sqrtd2(sum);
}

View File

@@ -0,0 +1,40 @@
/* hypotf4 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
hypotf4 (vector float x, vector float y)
{
vec_float4 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return sqrtf4(sum);
}

View File

@@ -0,0 +1,84 @@
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_ILOGB0
#define FP_ILOGB0 ((int)0x80000001)
#endif
#ifndef FP_ILOGBNAN
#define FP_ILOGBNAN ((int)0x7FFFFFFF)
#endif
vector signed long long
ilogbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, iszeroinf;
vec_llong2 ilogb = (vec_llong2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
return ilogb;
}

View File

@@ -0,0 +1,48 @@
/* ilogbf4 - for each element of vector x, return integer exponent of x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef FP_ILOGB0
#define FP_ILOGB0 ((int)0x80000001)
#endif
vector signed int
ilogbf4 (vector float x)
{
vec_int4 minus127 = spu_splats((int)-127);
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
}

View File

@@ -0,0 +1,39 @@
/* irintf4 - for each of four float slots, round to the nearest integer,
consistent with the current rounding model.
On SPU, the rounding mode for float is always towards zero.
vector singned int is returned.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int irintf4(vector float in)
{
return spu_convts(in,0);
}

View File

@@ -0,0 +1,55 @@
/* iroundf4 - for each of four float slots, round to the nearest integer,
halfway cases are rounded away form zero.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int iroundf4(vector float in)
{
vec_int4 exp, out;
vec_uint4 addend;
/* Add 0.5 (fixed precision to eliminate rounding issues
*/
exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF));
addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp),
spu_cmpgt((vec_uint4)exp, -31));
in = (vec_float4)spu_add((vec_uint4)in, addend);
/* Truncate the result.
*/
out = spu_convts(in,0);
return (out);
}

View File

@@ -0,0 +1,46 @@
/* is0denormd2 - for each of two double slots, if input equals 0 or denorm return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
is0denormd2 (vector double x)
{
vec_double2 xexp;
vec_ullong2 cmp;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
cmp = spu_shuffle( cmp, cmp, even );
return cmp;
}

View File

@@ -0,0 +1,37 @@
/* is0denormf4 - for each element of vector x, return a mask of ones if x' is zero or denorm, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
is0denormf4 (vector float x)
{
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
}

View File

@@ -0,0 +1,54 @@
/* isequald2 - for each of two double slots, if x = y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
spu_or( isnand2( x ), isnand2( y ) ) );
}

View File

@@ -0,0 +1,37 @@
/* isequalf4 - for each element of vector x and y, return a mask of ones if x' is equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isequalf4 (vector float x, vector float y)
{
return spu_cmpeq(x, y);
}

View File

@@ -0,0 +1,47 @@
/* isfinited2 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isfinited2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Finite unless NaN or Inf, check for 'not all-ones exponent'
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
}

View File

@@ -0,0 +1,40 @@
/* isfinitef4 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isfinitef4 (vector float x)
{
(void)x;
// NaN, INF not supported on SPU, result always a mask of ones
return spu_splats((unsigned int)0xffffffff);
}

View File

@@ -0,0 +1,65 @@
/* isgreaterd2 - for each of two double slots, if x > y return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
}

View File

@@ -0,0 +1,67 @@
/* isgreaterequald2 - for each of two double slots, if x is greater or equal to y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isgreaterequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
}

View File

@@ -0,0 +1,41 @@
/* isgreaterequalf4 - for each element of vector x and y, return a mask of ones if x' is greater than or equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isgreaterequalf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpgt(y, x);
return spu_nor(var, var);
}

View File

@@ -0,0 +1,37 @@
/* isgreaterf4 - for each element of vector x and y, return a mask of ones if x' is greater than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isgreaterf4 (vector float x, vector float y)
{
return spu_cmpgt(x, y);
}

View File

@@ -0,0 +1,47 @@
/* isinfd2 - for each of two double slots, if input equals +Inf or -Inf return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isinfd2 (vector double x)
{
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_double2 xabs;
vec_ullong2 cmp;
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
return cmp;
}

View File

@@ -0,0 +1,40 @@
/* isinff4 - for each element of vector x, return a mask of ones if x' is INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isinff4 (vector float x)
{
(void)x;
// INF not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}

View File

@@ -0,0 +1,64 @@
/* islessd2 - for each of two double slots, if x < y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
}

View File

@@ -0,0 +1,66 @@
/* islessequald2 - for each of two double slots, if x <= y return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
spu_or( isnand2 ( x ), isnand2 ( y ) ) );
}

View File

@@ -0,0 +1,41 @@
/* islessequalf4 - for each element of vector x and y, return a mask of ones if x' is less than or equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessequalf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpgt(x, y);
return spu_nor(var, var);
}

View File

@@ -0,0 +1,37 @@
/* islessf4 - for each element of vector x and y, return a mask of ones if x' is less than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessf4 (vector float x, vector float y)
{
return spu_cmpgt(y, x);
}

View File

@@ -0,0 +1,55 @@
/* islessgreaterd2 - for each of two double slots, if x is less or greater than y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
islessgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
spu_or( bothzero, spu_or( isnand2 ( x ), isnand2 ( y ) ) ) );
}

View File

@@ -0,0 +1,41 @@
/* islessgreaterf4 - for each element of vector x and y, return a mask of ones if x' is less than or greater than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
islessgreaterf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpeq(x, y);
return spu_nor(var, var);
}

View File

@@ -0,0 +1,52 @@
/* isnand2 - for each of two double slots, if input is any type of NaN return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isnand2 (vector double x)
{
vec_double2 xneg;
vec_ullong2 cmpgt, cmpeq, cmpnan;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return cmpnan;
}

View File

@@ -0,0 +1,40 @@
/* isnanf4 - for each element of vector x, return a mask of ones if x' is NaN, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isnanf4 (vector float x)
{
(void)x;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}

View File

@@ -0,0 +1,49 @@
/* isnormald2 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isnormald2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Normal unless nan, infinite, denorm, or zero
//Check for 'not zero or all-ones exponent'
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
}

View File

@@ -0,0 +1,38 @@
/* isnormalf4 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isnormalf4 (vector float x)
{
// NaN, INF not supported on SPU; normal unless zero
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
}

View File

@@ -0,0 +1,63 @@
/* isunorderedd2 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned long long
isunorderedd2 (vector double x, vector double y)
{
vec_double2 neg;
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
//Check if x is nan
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
//Check if y is nan
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return spu_or( cmpnanx, cmpnany );
}

View File

@@ -0,0 +1,41 @@
/* isunorderedf4 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector unsigned int
isunorderedf4 (vector float x, vector float y)
{
(void)x;
(void)y;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}

View File

@@ -0,0 +1,263 @@
/* ldexpd2 - Multiply Double by 2 Raised to its Power
For large elements of ex (overflow), returns HUGE_VALF
For small elements of ex (underflow), returns 0.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
ldexpd2(vector double x, vector signed long long ex)
{
vec_int4 e1, e2;
vec_int4 min = spu_splats(-2099);
// vec_int4 min = spu_splats(-2044);
vec_int4 max = spu_splats( 2098);
// vec_int4 max = spu_splats( 2046);
vec_uint4 cmp_min, cmp_max;
vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
vec_double2 f1, f2;
vec_double2 out;
vec_double2 in = x;
vec_int4 exp_in;
// check input data range
vec_int4 exp0 = spu_shuffle( (vec_int4)ex, (vec_int4)ex, ((vec_uchar16){4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15}));
vec_int4 dmy = spu_shuffle( (vec_int4)spu_splats(0x10000), (vec_int4)ex, ((vec_uchar16){16,1,2,3, 16,1,2,3, 24,1,2,3,24,1,2,3}));
// (-)0xFFFFFFFF80000000 or (+)0x000000007FFFFFFF
vec_int4 msk_range = ((vec_int4){0,0x80000000, 0,0x80000000});
vec_int4 inrange = spu_addx( (vec_int4)ex, msk_range, spu_rlqwbyte(spu_genc((vec_int4)ex, msk_range), 4));
inrange = (vec_int4)spu_cmpeq( inrange, 0 );
inrange = spu_shuffle(inrange,inrange,((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
// select dummy over ranged data or input data
vec_int4 exp = spu_sel( dmy, exp0, (vec_uint4)inrange);
exp_in = exp;
/* Clamp the specified exponent to the range -2044 to 2046.
*/
cmp_min = spu_cmpgt(exp, min);
cmp_max = spu_cmpgt(exp, max);
exp = spu_sel(min, exp, cmp_min);
exp = spu_sel(exp, max, cmp_max);
/* Generate the factors f1 = 2^e1 and f2 = 2^e2
*/
e1 = spu_rlmaska(exp, -1);
e2 = spu_sub(exp, e1);
f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift);
vec_double2 otmp = spu_mul(x, f1);
vec_uint4 fpscr1 = spu_mffpscr();
f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift);
out = spu_mul(otmp, f2);
vec_uint4 fpscr2 = spu_mffpscr();
/* Compute the product x * 2^e1 * 2^e2
*/
// out = spu_mul(spu_mul(x, f1), f2);
// check floating point register DENORM bit
vec_uint4 fpscr0, fpscr;
fpscr0 = spu_or(fpscr1, fpscr2);
fpscr = spu_shuffle(fpscr0, fpscr0, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,10,0x80,0x80,0x80,6,0x80,0x80,0x80,0x80,0x80}));
fpscr = spu_or(fpscr0, fpscr);
if ( __builtin_expect(spu_extract(fpscr, 1) == 0, 1) ) return out;
//////////////////////
// Denormalized calc//
//////////////////////
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
vec_uint4 signmask = ((vec_uint4){0x80000000,0,0x80000000,0});
vec_int4 zeros = spu_splats(0);
vec_uchar16 msk_64_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
//check input was zero
vec_uint4 x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 x_zero = spu_cmpeq( x_body, (vec_uint4)zeros );
x_zero = spu_and( x_zero, spu_shuffle(x_zero,x_zero,msk_64_eq));
// check Denormalized input
vec_int4 cnt_zero = (vec_int4)spu_cntlz(x_body);
vec_uint4 is_den = (vec_uint4)spu_cmpgt(cnt_zero, 11); // Denormalized data 000XXXXX XXXXXXXX
is_den = spu_shuffle( is_den, is_den, splat_msb);
is_den = spu_sel(is_den, (vec_uint4)zeros, x_zero); // exclude zero from denormalized
// count 0bits for 64bit
vec_uint4 cnt_ex = (vec_uint4)spu_cmpgt(cnt_zero, 31); // Denormalized data 00000000 XXXXXXXX
vec_int4 cnt_z = spu_shuffle( cnt_zero, cnt_zero, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}));
cnt_zero = spu_add(cnt_zero, spu_sel(zeros, cnt_z, cnt_ex));
cnt_zero = spu_shuffle(cnt_zero, cnt_zero, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
// extract each 64bit data
x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x000FFFFF,-1,0x000FFFFF,-1}));
vec_uint4 mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)exp_in, -31);
sign = spu_shuffle(sign, sign, splat_msb);
// set max shift count
vec_int4 sht = spu_add( cnt_zero, ((vec_int4){-11,-64,-11,-64}));
// denorm & exp+ shift left
vec_uint4 cmp = spu_cmpgt( sht, exp_in);
vec_int4 sht_l = spu_sel(sht, exp_in, cmp);
int shtl0 = spu_extract(sht_l, 0);
int shtl1 = spu_extract(sht_l, 2);
vec_uint4 mant0l = spu_slqwbytebc( spu_slqw(mant0, shtl0), shtl0 );
vec_uint4 mant1l = spu_slqwbytebc( spu_slqw(mant1, shtl1), shtl1 );
vec_int4 expp = spu_shuffle(spu_sub(exp_in, sht_l), zeros, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
exp0 = spu_sel( expp, exp_in, sign ); // select plus or minus caluc
vec_uint4 mantl = spu_shuffle( mant0l, mant1l, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
vec_uint4 mant = spu_sel( mantl, (vec_uint4)x, sign);
exp = spu_sel( exp_in, exp0, is_den ); // select denormalized
x = (vec_double2)spu_sel( (vec_uint4)x, mant, is_den);
//////////////////////////////////////////////////////////////////////////
// from ldexpf4
vec_int4 expmask = ((vec_int4){0x7FF00000, 0, 0x7FF00000, 0});
e1 = spu_and((vec_int4)x, expmask);
e2 = spu_rlmask(e1,-20);
vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 2046);
vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-2044), exp);
minmask = spu_or (minmask, (vec_uchar16)x_zero);
vec_int4 esum = spu_add(e2, exp);
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
minmask = spu_shuffle(minmask, minmask, splat_msb);
// check denorm
vec_uint4 mxmask = spu_and(spu_cmpgt(e2, 0), ((vec_uint4){0x00100000,0,0x00100000,0})); // not denorm
vec_int4 esum2 = spu_sub(esum, (vec_int4)spu_rlmask(mxmask, -20)); // reverse to norm
vec_uint4 mrange = spu_and(spu_cmpgt(zeros, esum2), spu_cmpgt(esum2, -55)); // denorm range
mrange = spu_shuffle(mrange, mrange, splat_msb);
vec_int4 sht_r = spu_sel(spu_splats(-54), esum2, spu_cmpgt(esum2, spu_splats(-54)) );
vec_int4 sht_rh = spu_add( sht_r, ((vec_int4){7,7,7,7}));
x_body = spu_or( x_body, mxmask );
mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) );
vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) );
#ifdef LDEXPD2_ROUND
// check current round mode
fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0}));
fpscr1 = spu_and(fpscr, ((vec_uint4){0,0x300,0,0}));
// prepare round data
vec_uint4 rnd0 = spu_slqwbytebc( spu_slqw( mant0r, 31), 31);
vec_uint4 rnd1 = spu_slqwbytebc( spu_slqw( mant1r, 31), 31);
vec_uint4 rnd0w = (vec_uint4)spu_cntb( (vec_uchar16)rnd0 );
vec_uint4 rnd1w = (vec_uint4)spu_cntb( (vec_uchar16)rnd1 );
rnd0w = spu_or( spu_slqwbyte(rnd0w,4), spu_slqwbyte(rnd0w,8));
rnd1w = spu_or( spu_slqwbyte(rnd1w,4), spu_slqwbyte(rnd1w,8));
rnd0 = spu_or( rnd0, rnd0w);
rnd1 = spu_or( rnd1, rnd1w);
// nearest
// check half
vec_uint4 hit0 = spu_cmpeq(rnd0, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out
vec_uint4 hit1 = spu_cmpeq(rnd1, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out
vec_uint4 add0 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit0);
vec_uint4 add1 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit1);
// check greater than half
rnd0 = spu_and( rnd0, ((vec_uint4){0,0x7FFFFFFF,0,0}));
rnd1 = spu_and( rnd1, ((vec_uint4){0,0x7FFFFFFF,0,0}));
hit0 = spu_cmpgt(rnd0, ((vec_uint4){0,0x40000000,0,0}));
hit1 = spu_cmpgt(rnd1, ((vec_uint4){0,0x40000000,0,0}));
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// select if fp0
add0 = spu_sel((vec_uint4)zeros, add0, spu_cmpeq(fpscr0, (vec_uint4)zeros));
add1 = spu_sel((vec_uint4)zeros, add1, spu_cmpeq(fpscr1, (vec_uint4)zeros));
// toward zero do nothing
// upward
sign = spu_rlmaska((vec_uint4)in, -31);
vec_uint4 sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,0,0,0,0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 sign1 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,8,8,8,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 hit0w = spu_cmpgt(rnd0, ((vec_uint4){0,0,0,0}));
vec_uint4 hit1w = spu_cmpgt(rnd1, ((vec_uint4){0,0,0,0}));
hit0 = spu_andc(hit0w, sign0);
hit1 = spu_andc(hit1w, sign1);
hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0x800,0,0})));
hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x200,0,0})));
// select if fp2
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// downward
hit0 = spu_and(hit0w, sign0);
hit1 = spu_and(hit1w, sign1);
hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0xc00,0,0})));
hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x300,0,0})));
// select if fp3
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// calc round
mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4));
mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4));
#endif // LDEXPD2_ROUND
vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
// select right answer
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_uchar16)expmask);
x = spu_sel(x, (vec_double2)zeros, minmask);
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), maxmask);
out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);
// check Infinity,NaN
vec_uint4 is_inf = spu_cmpeq(e1, expmask);
is_inf = spu_and( is_inf, spu_shuffle(is_inf,is_inf,msk_64_eq));
out = (vec_double2)spu_sel((vec_uint4)out , (vec_uint4)in, is_inf);
out = spu_sel(out, in, (vec_ullong2)signmask);
return out;
}

View File

@@ -0,0 +1,56 @@
/* ldexpf4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
ldexpf4 (vector float x, vector signed int exp)
{
vec_int4 zeros = spu_splats(0);
vec_uchar16 expmask = (vec_uchar16)spu_splats((int)0x7F800000);
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
vec_int4 e2 = spu_rlmask(e1,-23);
vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 255);
vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-255), exp);
minmask = spu_or (minmask, (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros));
vec_int4 esum = spu_add(e2, exp);
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 255));
maxmask = spu_and(maxmask, (vec_uchar16)spu_splats((int)0x7FFFFFFF));
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
x = spu_sel(x, (vec_float4)zeros, minmask);
//x = spu_sel(x, (vec_float4)spu_splats((int)0xFFFFFFFF), maxmask);
x = spu_sel(x, (vec_float4)maxmask, maxmask);
return x;
}

View File

@@ -0,0 +1,45 @@
/* llabsi2 - returns absolute value of input.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed long long
llabsi2 (vector signed long long in)
{
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
vec_uint4 add_1 = ((vec_uint4){0,1,0,1});
vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in);
res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4));
res = spu_sel( (vec_uint4)in, res, sign);
return ((vec_llong2)(res));
}

View File

@@ -0,0 +1,123 @@
/* Common functions for lldivi2/lldivu2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LLDIV_H__
#define __LLDIV_H__
#include <spu_intrinsics.h>
static inline vector unsigned long long ll_spu_cntlz(vector unsigned long long x);
static inline vector unsigned long long ll_spu_sl(vector unsigned long long x, vector unsigned long long count);
static inline vector unsigned long long ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count);
static inline vector unsigned long long ll_spu_cmpeq_zero(vector unsigned long long x);
static inline vector unsigned long long ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y);
static inline vector unsigned long long ll_spu_sub(vector unsigned long long x, vector unsigned long long y);
static inline vector unsigned long long
ll_spu_cntlz(vector unsigned long long x)
{
vec_uint4 cnt;
cnt = spu_cntlz((vec_uint4)x);
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
return (vec_ullong2)cnt;
}
static inline vector unsigned long long
ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
// shift upper word
x_upper = spu_and(x, mask);
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
vec_uint4 cnt_byte;
cnt_byte = spu_add((vec_uint4)count, 7);
// shift upper word
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_andc(x, mask);
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
ll_spu_cmpeq_zero(vector unsigned long long x)
{
vec_uint4 cmp;
cmp = spu_cmpeq((vec_uint4)x, 0);
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
static inline vector unsigned long long
ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 gt;
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vector unsigned long long
ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 borrow;
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
}
#endif // __LLDIV_H__

View File

@@ -0,0 +1,128 @@
/* lldivi2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "lldiv.h"
static inline vector signed long long _negatell2 (vector signed long long x);
static inline vector signed long long
_negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
lldivi2_t lldivi2 (vector signed long long numer, vector signed long long denom)
{
lldivi2_t res;
vec_ullong2 numerAbs, denomAbs;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
// Determine whether result needs sign change
numerPos = spu_cmpgt((vec_int4)numer, -1);
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
denomPos = spu_cmpgt((vec_int4)denom, -1);
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_ullong2)spu_sel(_negatell2(numer), numer, (vec_ullong2)numerPos);
denomAbs = (vec_ullong2)spu_sel(_negatell2(denom), denom, (vec_ullong2)denomPos);
// Get difference of leading zeros.
denomZeros = (vec_uint4)ll_spu_cntlz( denomAbs );
numerZeros = (vec_uint4)ll_spu_cntlz( numerAbs );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = ll_spu_sl( denomAbs, (vec_ullong2)shift );
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denomAbs ) );
// Shift left all leading zeros.
denomLeft = ll_spu_sl( denomAbs, (vec_ullong2)denomZeros );
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
quot = spu_splats(0ull);
do
{
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = ll_spu_cmpgt( denomShifted, numerAbs );
newNum = ll_spu_sub( numerAbs, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
quot = spu_sel( newQuot, quot, skip );
numerAbs = spu_sel( newNum, numerAbs, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = spu_sel((vec_llong2)quot, _negatell2((vec_llong2)quot), (vec_ullong2)quotNeg);
res.rem = spu_sel(_negatell2((vec_llong2)numerAbs), (vec_llong2)numerAbs, (vec_ullong2)numerPos);
return res;
}

View File

@@ -0,0 +1,98 @@
/* lldivu2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include "lldiv.h"
// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
lldivu2_t lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
{
lldivu2_t res;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
denomZeros = (vec_uint4)ll_spu_cntlz( denom );
numerZeros = (vec_uint4)ll_spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = ll_spu_sl( denom, (vec_ullong2)shift );
oneShifted = ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), ll_spu_cmpeq_zero( denom ) );
// Shift left all leading zeros.
denomLeft = ll_spu_sl( denom, (vec_ullong2)denomZeros );
oneLeft = ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
quot = spu_splats(0ull);
do
{
cont = ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = ll_spu_cmpgt( denomShifted, numer );
newNum = ll_spu_sub( numer, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = (vec_uint4)spu_sel( ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
oneShifted = ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
}

View File

@@ -0,0 +1,110 @@
/* llrintd2 - rounds two doubles in to two nearest 64bit integer.
consistent with the current rounding mode.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
vector signed long long
llrintd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
vec_int4 exp;
vec_uint4 mant, mant0, mant1, sign, mask, borrow;
vec_uint4 implied_one = ((vec_uint4){ 0, 0, 0x00100000, 0});
vec_uint4 exp_mask = ((vec_uint4){-1,-1, 0xFFF00000, 0});
vec_double2 bias;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
// check denormalized
vec_uint4 exp_in = spu_and( (vec_uint4)in, 0x7FF00000 );
vec_uint4 is_denorm = spu_cmpeq( exp_in, 0 );
vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
// check zero
vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
ofs = spu_sel( ofs, vec_zero, is_zerox);
vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
/* Round the input according to the current rounding mode.
*/
vec_uint4 is_large = spu_cmpgt( exp_in, 0x43200000 );
is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)mant, -20), 0x7FF);
exp = spu_add(exp, -1011);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 2);
mask = spu_cmpgt(exp, 0);
mask = spu_shuffle(mask, mask, splat_msb);
/* Algn mantissa bits
*/
mant0 = spu_sel(spu_rlmaskqwbyte(mant, -8), implied_one, exp_mask);
mant1 = spu_sel(mant, implied_one, exp_mask);
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
mant = spu_and(mant, mask);
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)xx, -31);
sign = spu_shuffle(sign, sign, splat_msb);
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}

View File

@@ -0,0 +1,102 @@
/* llrintf4 - rounds four floats in to four nearest 64bit integer.
On SPU the rounding mode for floats is always towards 0.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
llroundf4_t
llrintf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
vec_uint4 mant0, mant1, mant2, mant3;
vec_uint4 mask, mask0, mask1;
vec_uint4 sign, sign0, sign1;
vec_uint4 borrow0, borrow1;
vec_uint4 res0, res1;
int shift0, shift1, shift2, shift3;
/* Place mantissa bits (including implied most signficant
* bit) into the most significant bits of element 3. Elements
* 0, 1, and 2 are zeroed.
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
exp = spu_add(exp, -94);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 1);
shift2 = spu_extract(exp, 2);
shift3 = spu_extract(exp, 3);
/* Algn mantissa bits
*/
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
mask = spu_cmpgt(exp, 0);
mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res0 = spu_and(res0, mask0);
res1 = spu_and(res1, mask1);
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_xor(res0, sign0);
res1 = spu_xor(res1, sign1);
borrow0 = spu_genb(res0, sign0);
borrow1 = spu_genb(res1, sign1);
borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
return res;
}

View File

@@ -0,0 +1,92 @@
/* llroundd2 - rounds two doubles in to two nearest 64bit integer.
0.5 will be rounded to far from 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
vector signed long long
llroundd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
vec_int4 exp;
vec_uint4 mant, mant0, mant1, sign, mask, borrow, addend;
vec_uint4 implied_one = { 0, 0, 0x00100000, 0};
vec_uint4 exp_mask = { -1, -1,0xFFF00000, 0};
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -20), 0x7FF);
exp = spu_add(exp, -1011);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 2);
mask = spu_cmpgt(exp, 0);
mask = spu_shuffle(mask, mask, splat_msb);
/* Algn mantissa bits
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -8), implied_one, exp_mask);
mant1 = spu_sel((vec_uint4)in, implied_one, exp_mask);
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
mant = spu_and(mant, mask);
/* Perform round by adding 1 if the fraction bits are
* greater than or equal to .5
*/
addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend = spu_rlmask(addend, -7);
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign = spu_shuffle(sign, sign, splat_msb);
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}

View File

@@ -0,0 +1,115 @@
/* llroundf4 - rounds four floats in to four nearest 64bit integer.
0.5 will be rounded to far from 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
llroundf4_t
llroundf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
vec_uint4 mant0, mant1, mant2, mant3;
vec_uint4 mask, mask0, mask1;
vec_uint4 sign, sign0, sign1;
vec_uint4 addend0, addend1;
vec_uint4 borrow0, borrow1;
vec_uint4 res0, res1;
int shift0, shift1, shift2, shift3;
/* Place mantissa bits (including implied most signficant
* bit) into the most significant bits of element 3. Elements
* 0, 1, and 2 are zeroed.
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
exp = spu_add(exp, -94);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 1);
shift2 = spu_extract(exp, 2);
shift3 = spu_extract(exp, 3);
/* Algn mantissa bits
*/
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
mask = spu_cmpgt(exp, 0);
mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res0 = spu_and(res0, mask0);
res1 = spu_and(res1, mask1);
/* Perform round by adding 1 if the fraction bits are
* greater than or equal to .5
*/
addend0 = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend0 = spu_rlmask(addend0, -7);
addend1 = spu_rlmask(addend1, -7);
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_xor(res0, sign0);
res1 = spu_xor(res1, sign1);
borrow0 = spu_genb(res0, sign0);
borrow1 = spu_genb(res1, sign1);
borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
return res;
}

View File

@@ -0,0 +1,79 @@
/* log10f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _LOG10F_H_loga2msb ((float)0.3010299205780f)
#define _LOG10F_H_loga2lsb ((float)7.5085978266e-8f)
#define _LOG10F_H_logaemsb ((float)0.4342944622040f)
#define _LOG10F_H_logaelsb ((float)1.9699272335e-8f)
#define _LOG10F_H_logae ((float)0.4342944819033f)
#define _LOG10F_H_c0 ((float)(0.2988439998f))
#define _LOG10F_H_c1 ((float)(0.3997655209f))
#define _LOG10F_H_c2 ((float)(0.6666679125f))
vector float
log10f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = divf4 (x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(_LOG10F_H_c0), w, spu_splats(_LOG10F_H_c1));
polyw = spu_madd(polyw , w, spu_splats(_LOG10F_H_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 wnew = spu_convtf(xexp,0);
vec_float4 zz1 = spu_madd(spu_splats(_LOG10F_H_logaemsb), x1,
spu_mul(spu_splats(_LOG10F_H_loga2msb),wnew));
vec_float4 zz2 = spu_madd(spu_splats(_LOG10F_H_logaelsb), x1,
spu_madd(spu_splats(_LOG10F_H_loga2lsb), wnew,
spu_mul(spu_splats(_LOG10F_H_logae), yneg))
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}

View File

@@ -0,0 +1,51 @@
/* log1pf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
log1pf4 (vector float x)
{
vec_uchar16 nearzeromask = (vec_uchar16)spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
spu_cmpgt(spu_splats(0.5f), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats((float)1.5934420741f), spu_splats((float)0.8952856868f));
d1 = spu_madd(x , spu_splats((float)0.1198195734f), spu_splats((float)0.8377145063f));
d1 = spu_madd(x2, d1, d0);
n0 = spu_madd(x , spu_splats((float)1.1457993413f), spu_splats((float)0.8952856678f));
n1 = spu_madd(x , spu_splats((float)0.0082862580f), spu_splats((float)0.3394238808f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(logf4(spu_add(x, spu_splats(1.0f))),
spu_mul(x, divf4(n1, d1)),
nearzeromask);
}

View File

@@ -0,0 +1,71 @@
/* log2f4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _LOG2F_H_l2emsb ((float)1.4426950216293f)
#define _LOG2F_H_l2elsb ((float)1.9259629911e-8f)
#define _LOG2F_H_l2e ((float)1.4426950408890f)
#define _LOG2F_H_c0 ((float)(0.2988439998f))
#define _LOG2F_H_c1 ((float)(0.3997655209f))
#define _LOG2F_H_c2 ((float)(0.6666679125f))
vector float
log2f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = divf4(x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(_LOG2F_H_c0), w, spu_splats(_LOG2F_H_c1));
polyw = spu_madd(polyw , w, spu_splats(_LOG2F_H_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 zz1 = spu_madd(spu_splats(_LOG2F_H_l2emsb), x1, spu_convtf(xexp,0));
vec_float4 zz2 = spu_madd(spu_splats(_LOG2F_H_l2elsb), x1,
spu_mul(spu_splats(_LOG2F_H_l2e), yneg)
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}

View File

@@ -0,0 +1,93 @@
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef HUGE_VALL
#define HUGE_VALL __builtin_huge_vall ()
#endif
#ifndef DBL_INF
#define DBL_INF ((long long)0x7FF0000000000000ull)
#endif
#ifndef DBL_NAN
#define DBL_NAN ((long long)0x7FF8000000000000ull)
#endif
vector double
logbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_double2 logb = (vec_double2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)DBL_INF), isinf );
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats((long long)-HUGE_VALL), iszero );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
spu_nor( isnan, spu_or( isinf, iszero ) ) );
return logb;
}

View File

@@ -0,0 +1,46 @@
/* logbf4 - for each element of vector x, return the exponent of x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#ifndef HUGE_VALF
#define HUGE_VALF __builtin_huge_valf ()
#endif
vector float
logbf4 (vector float x)
{
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(e1, 0);
e1 = spu_sub(e1, spu_splats((int)0x3F800000));
return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
}

View File

@@ -0,0 +1,70 @@
/* logf4 - for each of four slots, calculate the natural log
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
#define _LOGF_H_ln2msb ((float)(0.6931470632553f))
#define _LOGF_H_ln2lsb ((float)(1.1730463525e-7f))
#define _LOGF_H_c0 ((float)(0.2988439998f))
#define _LOGF_H_c1 ((float)(0.3997655209f))
#define _LOGF_H_c2 ((float)(0.6666679125f))
vector float
logf4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uchar16)expmask);
vec_uint4 mask = spu_cmpgt(spu_splats((float)0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = divf4 (x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(_LOGF_H_c0), w, spu_splats(_LOGF_H_c1));
polyw = spu_madd(polyw , w, spu_splats(_LOGF_H_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 wnew = spu_convtf(xexp,0);
vec_float4 zz1 = spu_madd(spu_splats(_LOGF_H_ln2msb), wnew, x1);
vec_float4 zz2 = spu_madd(spu_splats(_LOGF_H_ln2lsb), wnew, yneg);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}

View File

@@ -0,0 +1,54 @@
/* modfd2 - for each of two double slots, compute fractional and integral parts.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// Returns fractional part and stores integral part in *iptr.
vector double
modfd2 (vector double x, vector double *iptr)
{
vec_double2 integral, fraction;
vec_uint4 iszero;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
integral = truncd2( x );
// if integral is zero, then fraction is x.
iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0);
iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern));
fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero);
*iptr = integral;
return fraction;
}

View File

@@ -0,0 +1,47 @@
/* modff4 - for each of four float slots, compute fractional and integral parts.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// Returns fractional part and stores integral part in *iptr.
vector float
modff4 (vector float x, vector float *iptr)
{
vec_float4 integral, fraction;
integral = truncf4( x );
fraction = spu_sub( x, integral );
*iptr = integral;
return fraction;
}

View File

@@ -0,0 +1,71 @@
/* nearbyintd2 - Round the input to the nearest integer according to
the current rounding mode without raising an inexact exception.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
nearbyintd2(vector double in)
{
vec_uint4 fpscr;
vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL});
vec_double2 out, addend;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
fpscr = spu_mffpscr();
// check denormalized
vec_uint4 exp = spu_and( (vec_uint4)in, 0x7FF00000 );
vec_uint4 is_denorm = spu_cmpeq( exp, 0 );
vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
// check zero
vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
ofs = spu_sel( ofs, vec_zero, is_zerox);
vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
/* Add 2^53 and then subtract 2^53 to affect a round to be performed by the
* hardware. Also preserve the input sign so that negative inputs that
* round to zero generate a -0.0.
*/
vec_uint4 is_large = spu_cmpgt( exp, 0x43200000 );
is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
addend = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
addend = spu_sel(addend, xx, sign);
out = spu_sel(spu_sub(spu_add(xx, addend), addend), xx, sign);
spu_mtfpscr(fpscr);
return (out);
}

View File

@@ -0,0 +1,50 @@
/* nearbyintf4 - for each of four float slots, round to the nearest integer,
consistent with the current rounding model,
without raising an inexact floating-point exception.
On SPU, the rounding mode for float is always towards zero.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float nearbyintf4(vector float x)
{
vector signed int xi;
vector unsigned int inrange;
// Can convert to and from signed integer to truncate values in range [-2^31, 2^31).
// However, no truncation needed if exponent > 22.
inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
return spu_sel( x, spu_convtf( xi, 0 ), inrange );
}

View File

@@ -0,0 +1,38 @@
/* negated2 - for each of two double slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
negated2 (vector double x)
{
return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
}

View File

@@ -0,0 +1,38 @@
/* negatef4 - for each of four float slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float negatef4 (vector float x)
{
return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) );
}

View File

@@ -0,0 +1,39 @@
/* negatei4 - for each of 4 signed int slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed int
negatei4 (vector signed int x)
{
vector signed int zero = (vector signed int){0,0,0,0};
return spu_sub (zero, x);
}

View File

@@ -0,0 +1,43 @@
/* negatell2 - for each of 2 signed long long slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector signed long long
negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}

View File

@@ -0,0 +1,92 @@
/* nextafterd2 - find next representable floating-point value towards 2nd param.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector double
nextafterd2 (vector double xx, vector double yy)
{
vec_uint4 abs_x, abs_y, sign_x, abs_dif;
vec_uint4 is_sub, is_zerox, is_zeroy;
vec_uint4 is_equal, is_infy, is_nany;
vec_uint4 res0, res1, res;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
vec_uint4 vec_one = ((vec_uint4){0,1,0,1});
vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1});
vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
// mask sign bit
abs_x = spu_and( (vec_uint4)xx, msk_abs);
abs_y = spu_and( (vec_uint4)yy, msk_abs);
is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
// -0 exception
sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
sign_x = spu_sel(sign_x, vec_zero, is_zerox);
// if same sign |y| < |x| -> decrease
abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease
is_sub = spu_or(is_sub, abs_dif);
is_sub = spu_rlmaska(is_sub, -31);
is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
res = spu_sel( res0, res1, is_sub); // select increase or decrease
res = spu_or( res, sign_x); // set sign
// check exception
// 0 -> -1
res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
// check equal (include 0,-0)
is_zeroy = spu_cmpeq( abs_y, vec_zero);
is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
res = spu_sel(res, (vec_uint4)yy, is_equal);
// check nan
is_infy = spu_cmpeq( abs_y, msk_exp);
is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
is_nany = spu_and( abs_y, msk_exp);
is_nany = spu_cmpeq( is_nany, msk_exp);
is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
is_nany = spu_sel( is_nany, vec_zero, is_infy);
res = spu_sel(res, (vec_uint4)yy, is_nany);
return (vec_double2)res;
}

View File

@@ -0,0 +1,66 @@
/* nextafterf4 - for each of four float slots,
return the the next representable value after x in the direction fo y,
if x is euqal to y, the result is y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float nextafterf4(vector float x, vector float y)
{
vec_float4 x_not_dec, lala_inc, lala_dec;
vec_uint4 abs_inc_number, abs_dec_number;
vec_uint4 A, B;
//abs_inc, abs_dec
abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats((float)0x0)));
abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats((float)0x0));
//x<= y
A= spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000));
// in < 0
B= abs_dec_number;
lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats((float)0x0), x));
// in <=0, abs_inc ( if in==0, set result's sign to -)
//A= spu_or(spu_splats((unsigned int)0x80000000), spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000)));
A= spu_or(abs_inc_number, spu_splats((unsigned int)0x80000000));
// in > 0
B = abs_dec_number;
lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats((float)0x0)));
x_not_dec = spu_sel(y, lala_inc, spu_cmpgt(y, x));
// (x <= y) || (x > y)
return spu_sel(x_not_dec, lala_dec, spu_cmpgt(x, y));
}

View File

@@ -0,0 +1,72 @@
/* powf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float
powf4 (vector float x, vector float y)
{
vec_int4 zeros = spu_splats((int)0);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 absy = spu_andc(y, sbit);
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
vec_float4 res = exp2f4(spu_mul(y, log2f4(absx)));
res = spu_sel(res, spu_or(sbit, res), negmask);
return spu_sel(res, (vec_float4)zeros, zeromask);
}
/*
{
vec_int4 zeros = spu_splats(0);
vec_int4 ones = (vec_int4)spu_splats((char)0xFF);
vec_uchar16 zeromask = (vec_uchar16)spu_cmpeq((vec_float4)zeros, x);
vec_uchar16 onemask = (vec_uchar16)spu_cmpeq((vec_float4)ones , y);
vec_uchar16 negmask = (vec_uchar16)spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 absy = spu_andc(y, sbit);
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), (vec_uint4)spu_splats(0x00000001));
negmask = spu_and(negmask, (vec_uchar16)spu_cmpgt(oddy, (vec_uint4)zeros));
}
*/

View File

@@ -0,0 +1,80 @@
/* recipd2 - for each of two double slots, compute reciprocal.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
// Handles exceptional values as follows:
// NaN -> NaN
// (+,-)Inf -> (+,-)0
// (+,-)0 -> (+,-)Inf
// Denormal inputs are treated as zero.
vector double
recipd2 (vector double x)
{
vec_ullong2 expmask, signmask;
vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result;
vec_float4 onef, manf, y0f, y1f;
expmask = spu_splats(0x7ff0000000000000ull);
signmask = spu_splats(0x8000000000000000ull);
onef = spu_splats(1.0f);
one = spu_extend( onef );
// Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ).
// Invert exponent part with subtraction.
exp = spu_and( x, (vec_double2)expmask );
nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp );
man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask );
// Compute mantissa part with single and double precision Newton-Raphson steps.
// Then multiply with 2^(1-exponent).
manf = spu_roundtf( man );
y0f = spu_re( manf );
y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f );
y1 = spu_extend( y1f );
y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 );
y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 );
y3 = spu_mul( y3, nexp );
// Choose iterated result or special value.
zero = spu_and( x, (vec_double2)signmask );
inf = spu_sel( (vec_double2)expmask, x, signmask );
result = spu_sel( y3, zero, isinfd2 ( x ) );
result = spu_sel( result, inf, is0denormd2 ( x ) );
result = spu_sel( result, x, isnand2( x ) );
return result;
}

View File

@@ -0,0 +1,45 @@
/* recipf4 - for each of four float slots, compute reciprocal.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
vector float recipf4 (vector float x)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact
// answers for powers of 2, and a slightly smaller relative error bound.
vec_float4 y0;
vec_float4 oneish = (vec_float4)spu_splats(0x3f800001);
y0 = spu_re( x );
return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 );
}

View File

@@ -0,0 +1,313 @@
/* A vector double is returned that contains the remainder xi REM yi,
for the corresponding elements of vector double x and vector double y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _twice(vec_uint4 aa);
vector double
remainderd2(vector double x, vector double yy)
{
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 y_hi;
vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
vec_uint4 bias;
vec_uint4 nan_out, overflow;
vec_uint4 result;
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
vec_uint4 vec_zero = spu_splats((unsigned int)0);
vec_uint4 is_zeroy;
// cut sign
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
// check nan out
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
nan_out = spu_or(nan_out, is_zeroy); // y = 0
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
// make y x2
abs_2y = _twice(abs_yy); // 2 x y
/*
* use fmodd2 function
*/
// get remainder of y x2
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_2y);
{
vec_double2 y = (vec_double2)abs_2y;
int shiftx0, shiftx1, shifty0, shifty1;
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 n, shift;
vec_uint4 exp_x, exp_y;
// , sign;
// vec_uint4 abs_x, abs_y;
vec_uint4 abs_y;
vec_uint4 mant_x, mant_x0, mant_x1;
vec_uint4 mant_y, mant_y0, mant_y1;
vec_uint4 mant_0, mant_1;
vec_uint4 mant_r, mant_l;
// vec_uint4 result;
vec_uint4 result0, resultx;
vec_uint4 zero_x, zero_y;
vec_uint4 denorm_x, denorm_y;
vec_uint4 cnt, cnt_x, cnt_y;
vec_uint4 shift_x, shift_y;
vec_uint4 adj_x, adj_y;
vec_uint4 z, borrow, mask;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
// sign = spu_and( (vec_uint4)x, sign_mask);
// abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
exp_x = spu_rlmask(abs_x, -20);
exp_y = spu_rlmask(abs_y, -20);
// get shift count for denorm
cnt_x = spu_cntlz(abs_x);
cnt_y = spu_cntlz(abs_y);
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
result0 = spu_or(zero_x, zero_y);
result0 = spu_shuffle(result0, result0, splat_hi);
// 0 - (cnt_x - 11) = 11 - cnt_x
shift_x= spu_add(cnt_x, -11);
shift_y= spu_add(cnt_y, -11);
cnt_x = spu_sub(11, cnt_x);
cnt_y = spu_sub(11, cnt_y);
// count to normalize
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
// for denorm
shiftx0 = spu_extract(shift_x, 0);
shiftx1 = spu_extract(shift_x, 2);
shifty0 = spu_extract(shift_y, 0);
shifty1 = spu_extract(shift_y, 2);
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
mant_x = spu_or(mant_x, implied_1); // hidden bit
mant_y = spu_or(mant_y, implied_1); // hidden bit
// x < y ?
resultx = _vec_gt64(abs_y, abs_x);
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
mask = spu_cmpgt(n, 0);
mask = spu_andc(mask, resultx);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
// bring back to original range
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
// for adj_y < 0 exp max=1
shiftx0 = spu_extract(adj_y, 0);
shiftx1 = spu_extract(adj_y, 2);
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
// for adj_y >= 0
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(cnt, -11);
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
shiftx0 = spu_extract(shift, 0);
shiftx1 = spu_extract(shift, 2);
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
cnt = spu_sub(adj_y, (vec_uint4)shift);
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
result = spu_sel(mant_l, mant_r, denorm_y);
result = spu_sel(result, vec_zero, result0); // reminder 0
result = spu_sel(result, abs_x, resultx); // x < y
// result = spu_xor(result, sign); // set sign
// return ((vec_double2)result);
}
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
/* if (2*x > y)
* x -= y
* if (2*x >= y) x -= y
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
// make x2
abs_2x = _twice(abs_x); // 2 x x
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
bias = spu_andc(bias, overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
// make x2
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
/* select final answer
*/
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
result = spu_sel(result, val_nan, nan_out); // if nan
return ((vec_double2)result);
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
/*
* double formated x2
*/
static inline vec_uint4 _twice(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}

View File

@@ -0,0 +1,107 @@
/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
vector float
remainderf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, i, rem;
vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
// Find i = truncated_integer(|x/y|)
// By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
// Thus the exact truncation is either the truncated quotient, one less, or one greater.
q = divf4( x, y );
xabs = fabsf4( x );
yabs = fabsf4( y );
qabs = fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
odd0 = odd2 = spu_nor( odd1, odd1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
i = i0;
i = spu_sel( i1, i, cmp1 );
i = spu_sel( i2, i, cmp2 );
odd = odd0;
odd = spu_sel( odd1, odd, cmp1 );
odd = spu_sel( odd2, odd, cmp2 );
rem = spu_nmsub( i, yabs, xabs );
// Test whether i or i+1 = nearest_integer(|x/y|)
//
// i+1 is correct if:
//
// rem > 0.5*|y|
// or
// rem = 0.5*|y| and i is odd
yabshalf = spu_mul( yabs, spu_splats(0.5f) );
cmp1 = spu_cmpgt( rem, yabshalf );
cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
i = copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}

View File

@@ -0,0 +1,356 @@
/* remquod2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <spu_intrinsics.h>
/*
* This function returns the same vector double result as remainderd2().
* In addition a vector signed long long is storedin *pquo,
* that contains the corresponding element values whose sign is
* the sign of xi / yi and whose magnitude is congruent modulo 2n to
* the magnitude of the integral quotient of xi / yi, where n is
* an implementation-defined integer greater than or equal to 3.
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb);
static inline vec_uint4 _twice(vec_uint4 aa);
vector double
remquod2(vector double x, vector double yy, vector signed long long *quo)
{
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 quotient, quotient0;
vec_uint4 y_hi;
vec_uint4 abs_x, abs_yy, abs_2x, abs_8y, abs_4y, abs_2y;
vec_uint4 bias;
vec_uint4 nan_out, not_ge, quo_pos, overflow;
vec_uint4 result;
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
vec_uint4 vec_zero = spu_splats((unsigned int)0);
vec_uint4 is_zeroy;
// cut sign
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
quo_pos = spu_cmpgt((vec_int4)spu_and((vec_uint4)spu_xor(x, yy), sign_mask), -1);
quo_pos = spu_shuffle(quo_pos, quo_pos, splat_hi);
// check nan out
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
nan_out = _vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
nan_out = spu_or(nan_out, is_zeroy); // y = 0
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
// make y x8
abs_2y = _twice(abs_yy); // 2 x y
abs_4y = _twice(abs_2y); // 4 x y
abs_8y = _twice(abs_4y); // 2 x y
/*
* use fmodd2 function
*/
// get remainder of y x8
// result = (vec_uint4)_fmodd2( x, (vec_double2)abs_8y);
{
vec_double2 y = (vec_double2)abs_8y;
int shiftx0, shiftx1, shifty0, shifty1;
vec_uchar16 swap_words = ((vec_uchar16){ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 propagate = ((vec_uchar16){ 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192});
// vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_int4 n, shift;
vec_uint4 exp_x, exp_y;
// , sign;
// vec_uint4 abs_x, abs_y;
vec_uint4 abs_y;
vec_uint4 mant_x, mant_x0, mant_x1;
vec_uint4 mant_y, mant_y0, mant_y1;
vec_uint4 mant_0, mant_1;
vec_uint4 mant_r, mant_l;
// vec_uint4 result;
vec_uint4 result0, resultx;
vec_uint4 zero_x, zero_y;
vec_uint4 denorm_x, denorm_y;
vec_uint4 cnt, cnt_x, cnt_y;
vec_uint4 shift_x, shift_y;
vec_uint4 adj_x, adj_y;
vec_uint4 z, borrow, mask;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
// vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
// vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 merge_sel = ((vec_uint4){0,0,-1,-1});
// vec_uint4 vec_zero = spu_splats((unsigned int)0);
// sign = spu_and( (vec_uint4)x, sign_mask);
// abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
exp_x = spu_rlmask(abs_x, -20);
exp_y = spu_rlmask(abs_y, -20);
// get shift count for denorm
cnt_x = spu_cntlz(abs_x);
cnt_y = spu_cntlz(abs_y);
cnt_x = spu_add(cnt_x, spu_sel( vec_zero, spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_y = spu_add(cnt_y, spu_sel( vec_zero, spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
zero_x = spu_cmpgt(cnt_x, 63); // zero ?
zero_y = spu_cmpgt(cnt_y, 63); // zero ?
result0 = spu_or(zero_x, zero_y);
result0 = spu_shuffle(result0, result0, splat_hi);
// 0 - (cnt_x - 11) = 11 - cnt_x
shift_x= spu_add(cnt_x, -11);
shift_y= spu_add(cnt_y, -11);
cnt_x = spu_sub(11, cnt_x);
cnt_y = spu_sub(11, cnt_y);
// count to normalize
adj_x = spu_sel(spu_add(exp_x, -1), cnt_x, spu_cmpeq(exp_x, 0));
adj_y = spu_sel(spu_add(exp_y, -1), cnt_y, spu_cmpeq(exp_y, 0));
adj_x = spu_shuffle(adj_x, adj_x, splat_hi);
adj_y = spu_shuffle(adj_y, adj_y, splat_hi);
// for denorm
shiftx0 = spu_extract(shift_x, 0);
shiftx1 = spu_extract(shift_x, 2);
shifty0 = spu_extract(shift_y, 0);
shifty1 = spu_extract(shift_y, 2);
mant_x0 = spu_slqwbytebc( spu_slqw(spu_and(abs_x,((vec_uint4){-1,-1,0,0})),shiftx0), shiftx0);
mant_y0 = spu_slqwbytebc( spu_slqw(spu_and(abs_y,((vec_uint4){-1,-1,0,0})),shifty0), shifty0);
mant_x1 = spu_slqwbytebc( spu_slqw(abs_x,shiftx1), shiftx1);
mant_y1 = spu_slqwbytebc( spu_slqw(abs_y,shifty1), shifty1);
mant_x = spu_sel(mant_x0, mant_x1, merge_sel);
mant_y = spu_sel(mant_y0, mant_y1, merge_sel);
denorm_x = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_x);
denorm_y = spu_cmpgt((vec_int4)vec_zero, (vec_int4)adj_y);
mant_x = spu_sel(spu_and(abs_x, mant_mask), mant_x, denorm_x);
mant_y = spu_sel(spu_and(abs_y, mant_mask), mant_y, denorm_y);
mant_x = spu_or(mant_x, implied_1); // hidden bit
mant_y = spu_or(mant_y, implied_1); // hidden bit
// x < y ?
resultx = _vec_gt64(abs_y, abs_x);
n = spu_sub((vec_int4)adj_x, (vec_int4)adj_y);
mask = spu_cmpgt(n, 0);
mask = spu_andc(mask, resultx);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
// bring back to original range
mant_0 = spu_and(mant_x, ((vec_uint4){0x001FFFFF,-1,0,0}));
mant_1 = spu_and(mant_x, ((vec_uint4){0,0,0x001FFFFF,-1}));
// for adj_y < 0 exp max=1
shiftx0 = spu_extract(adj_y, 0);
shiftx1 = spu_extract(adj_y, 2);
mant_x0 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_0, shiftx0), 7 + shiftx0);
mant_x1 = spu_rlmaskqwbytebc(spu_rlmaskqw(mant_1, shiftx1), 7 + shiftx1);
mant_r = spu_sel(mant_x0, mant_x1, merge_sel);
// for adj_y >= 0
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_sel( vec_zero, spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(cnt, -11);
cnt = spu_sel(vec_zero, cnt, spu_cmpgt(cnt, 0)); // for exp >= 1
shift = (vec_int4)spu_sel(cnt, adj_y, spu_cmpgt(cnt, adj_y));
shiftx0 = spu_extract(shift, 0);
shiftx1 = spu_extract(shift, 2);
mant_x0 = spu_slqwbytebc(spu_slqw(mant_0, shiftx0), shiftx0);
mant_x1 = spu_slqwbytebc(spu_slqw(mant_1, shiftx1), shiftx1);
mant_l = spu_sel(mant_x0, mant_x1, merge_sel);
cnt = spu_sub(adj_y, (vec_uint4)shift);
mant_l = spu_add(mant_l, spu_and(spu_rl(cnt,20), exp_mask));
result = spu_sel(mant_l, mant_r, denorm_y);
result = spu_sel(result, vec_zero, result0); // reminder 0
result = spu_sel(result, abs_x, resultx); // x < y
// result = spu_xor(result, sign); // set sign
// return ((vec_double2)result);
}
// if y (x8->exp+3 7FF-7FC) overflow
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
/* if (x >= 4*y)
* x -= 4*y
* quotient = 4
* else
* quotient = 0
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FCFFFFF));
not_ge = _vec_gt64(abs_4y, abs_x);
not_ge = spu_or(not_ge, overflow);
abs_x = spu_sel(_sub_d_(abs_x, abs_4y), abs_x, not_ge);
quotient = spu_andc(spu_splats((int)4), (vec_int4)not_ge);
/* if (x >= 2*y
* x -= 2*y
* quotient += 2
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FDFFFFF));
not_ge = _vec_gt64(abs_2y, abs_x); // abs_2y > abs_x
not_ge = spu_or(not_ge, overflow);
abs_x = spu_sel(_sub_d_(abs_x, abs_2y), abs_x, not_ge);
quotient = spu_sel(spu_add(quotient, 2), quotient, not_ge);
/* if (2*x > y)
* x -= y
* if (2*x >= y) x -= y
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
// make x2
abs_2x = _twice(abs_x); // 2 x x
bias = _vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
bias = spu_andc(bias, overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
quotient = spu_sub(quotient, (vec_int4)bias);
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
// make x2
abs_2x = _twice(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
bias = spu_andc(bias, spu_rlmaska(_sub_d_(abs_2x, abs_yy), -31));
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
abs_x = spu_sel(abs_x, _sub_d_(abs_x, abs_yy), bias);
quotient = spu_sub(quotient, (vec_int4)bias);
/* select final answer
*/
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
result = spu_sel(result, val_nan, nan_out); // if nan
quotient = spu_and(quotient, ((vec_int4){0,7,0,7})); // limit to 3bit
quotient0 = spu_subx( (vec_int4)vec_zero, quotient, spu_rlqwbyte(spu_genb((vec_int4)vec_zero,quotient),4));
quotient = spu_sel(quotient0, quotient, quo_pos);
*quo = (vec_llong2)quotient;
return ((vec_double2)result);
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4 _sub_d_(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = _vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4 _vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4 _vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = _vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
/*
* double formated x2
*/
static inline vec_uint4 _twice(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}

Some files were not shown because too many files have changed in this diff Show More