added updated version of simdmathlibrary-1.0.1

This commit is contained in:
ejcoumans
2007-07-27 18:53:58 +00:00
parent fddd6c5721
commit f360dd27d6
377 changed files with 9928 additions and 6136 deletions

View File

@@ -0,0 +1,116 @@
/* Common functions for lldivi2/lldivu2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIV_H___
#define ___SIMD_MATH_LLDIV_H___
#include <spu_intrinsics.h>
static inline vector unsigned long long
__ll_spu_cntlz(vector unsigned long long x)
{
vec_uint4 cnt;
cnt = spu_cntlz((vec_uint4)x);
cnt = spu_add(cnt, spu_and(spu_cmpeq(cnt, 32), spu_rlqwbyte(cnt, 4)));
cnt = spu_shuffle(cnt, cnt, ((vec_uchar16){0x80,0x80,0x80,0x80, 0,1,2,3, 0x80,0x80,0x80,0x80, 8,9,10,11}));
return (vec_ullong2)cnt;
}
static inline vector unsigned long long
__ll_spu_sl(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
// shift upper word
x_upper = spu_and(x, mask);
x_upper = spu_slqwbytebc(x_upper, spu_extract((vec_uint4)count, 1));
x_upper = spu_slqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_slqwbytebc(x, spu_extract((vec_uint4)count, 3));
x_lower = spu_slqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
__ll_spu_rlmask(vector unsigned long long x, vector unsigned long long count)
{
vec_ullong2 mask = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 x_upper, x_lower;
vec_uint4 cnt_byte;
cnt_byte = spu_add((vec_uint4)count, 7);
// shift upper word
x_upper = spu_rlmaskqwbytebc(x, spu_extract(cnt_byte, 1));
x_upper = spu_rlmaskqw(x_upper, spu_extract((vec_uint4)count, 1));
// shift lower word
x_lower = spu_andc(x, mask);
x_lower = spu_rlmaskqwbytebc(x_lower, spu_extract(cnt_byte, 3));
x_lower = spu_rlmaskqw(x_lower, spu_extract((vec_uint4)count, 3));
return spu_sel(x_lower, x_upper, mask);
}
static inline vector unsigned long long
__ll_spu_cmpeq_zero(vector unsigned long long x)
{
vec_uint4 cmp;
cmp = spu_cmpeq((vec_uint4)x, 0);
return (vec_ullong2)spu_and(cmp, spu_shuffle(cmp, cmp, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
static inline vector unsigned long long
__ll_spu_cmpgt(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 gt;
gt = spu_cmpgt((vec_uint4)x, (vec_uint4)y);
gt = spu_sel(gt, spu_rlqwbyte(gt, 4), spu_cmpeq((vec_uint4)x, (vec_uint4)y));
return (vec_ullong2)spu_shuffle(gt, gt, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vector unsigned long long
__ll_spu_sub(vector unsigned long long x, vector unsigned long long y)
{
vec_uint4 borrow;
borrow = spu_genb((vec_uint4)x, (vec_uint4)y);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_ullong2)spu_subx((vec_uint4)x, (vec_uint4)y, borrow);
}
#endif // __LLDIV_H__

View File

@@ -0,0 +1,84 @@
/* A vector double is returned that contains the internal routine regarding remainder.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH__REMAINDER_H___
#define ___SIMD_MATH__REMAINDER_H___
#include <simdmath/_vec_utils.h>
/*
* double formated x2
*/
static inline vec_uint4
__rem_twice_d(vec_uint4 aa)
{
vec_uint4 norm = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL))); // exp > 0
norm = spu_shuffle(norm, norm, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// if denorm or zero << 1 , if norm exp + 1
return spu_sel(spu_slqw(aa, 1), spu_add(aa, (vec_uint4)(spu_splats(0x0010000000000000ULL))), norm); // x2
}
/*
* subtraction function in limited confdition
*/
static inline vec_uint4
__rem_sub_d(vec_uint4 aa, vec_uint4 bb)
{
// which is bigger input aa or bb
vec_uint4 is_bigb = __vec_gt64(bb, aa); // bb > aa
// need denorm calc ?
vec_uint4 norm_a, norm_b;
norm_a = spu_cmpgt(aa, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_b = spu_cmpgt(bb, (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL)));
norm_a = spu_and(norm_a, norm_b);
norm_a = spu_shuffle(norm_a, norm_a,((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
// calc (aa - bb) and (bb - aa)
vec_uint4 res_a, res_b, res;
vec_uint4 borrow_a, borrow_b;
vec_uchar16 mask_b = ((vec_uchar16){4,5,6,7,192,192,192,192,12,13,14,15,192,192,192,192});
borrow_a = spu_genb(aa, bb);
borrow_b = spu_genb(bb, aa);
borrow_a = spu_shuffle(borrow_a, borrow_a, mask_b);
borrow_b = spu_shuffle(borrow_b, borrow_b, mask_b);
res_a = spu_subx(aa, bb, borrow_a);
res_b = spu_subx(bb, aa, borrow_b);
res_b = spu_or(res_b, ((vec_uint4){0x80000000,0,0x80000000,0})); // set sign
res = spu_sel(res_a, res_b, is_bigb); // select (aa - bb) or (bb - aa)
// select normal calc or special
res = spu_sel(res, (vec_uint4)spu_sub((vec_double2)aa, (vec_double2)bb), norm_a);
return res;
}
#endif

View File

@@ -0,0 +1,57 @@
/* Common types for SPU SIMD Math Library
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH__VEC_UTILS_H___
#define ___SIMD_MATH__VEC_UTILS_H___
/*
* extend spu_cmpgt function to 64bit data
*/
static inline vec_uint4
__vec_gt64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt = spu_cmpgt(aa, bb); // aa > bb
vec_uint4 eq = spu_cmpeq(aa, bb); // aa = bb
return spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4))); // only higher is right
}
static inline vec_uint4
__vec_gt64(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 gt_hi = __vec_gt64_half(aa, bb); // only higher is right
return spu_shuffle(gt_hi, gt_hi, ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11}));
}
static inline vec_uint4
__vec_eq64_half(vec_uint4 aa, vec_uint4 bb)
{
vec_uint4 eq = spu_cmpeq(aa, bb);
return spu_and(eq, spu_shuffle(eq, eq, ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11})));
}
#endif

View File

@@ -0,0 +1,44 @@
/* absi4 - for each of four integer slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ABSI4_H___
#define ___SIMD_MATH_ABSI4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed int
_absi4 (vector signed int x)
{
vec_int4 neg;
neg = spu_sub( 0, x );
return spu_sel( neg, x, spu_cmpgt( x, -1 ) );
}
#endif

View File

@@ -0,0 +1,82 @@
/* acosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ACOSF4_H___
#define ___SIMD_MATH_ACOSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sqrtf4.h>
//
// Computes the inverse cosine of all four slots of x
//
static inline vector float
_acosf4 (vector float x)
{
vec_float4 result, xabs;
vec_float4 t1;
vec_float4 xabs2, xabs4;
vec_float4 hi, lo;
vec_float4 neg, pos;
vec_uint4 select;
xabs = (vec_float4)(spu_rlmask(spu_sl((vec_uint4)(x), 1), -1));
select = (vec_uint4)(spu_rlmaska((vector signed int)(x), -31));
t1 = _sqrtf4(spu_sub( spu_splats(1.0f), xabs));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = spu_mul(xabs, xabs);
xabs4 = spu_mul(xabs2, xabs2);
hi = spu_madd(spu_splats(-0.0012624911f), xabs, spu_splats(0.0066700901f));
hi = spu_madd(hi, xabs, spu_splats(-0.0170881256f));
hi = spu_madd(hi, xabs, spu_splats( 0.0308918810f));
lo = spu_madd(spu_splats(-0.0501743046f), xabs, spu_splats(0.0889789874f));
lo = spu_madd(lo, xabs, spu_splats(-0.2145988016f));
lo = spu_madd(lo, xabs, spu_splats( 1.5707963050f));
result = spu_madd(hi, xabs4, lo);
/* Adjust the result if x is negactive.
*/
neg = spu_nmsub(t1, result, spu_splats(3.1415926535898f));
pos = spu_mul(t1, result);
result = spu_sel(pos, neg, select);
return result;
}
#endif

View File

@@ -0,0 +1,92 @@
/* asinf4 - Computes the inverse sine of all four slots of x
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ASINF4_H___
#define ___SIMD_MATH_ASINF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sqrtf4.h>
#include <simdmath/divf4.h>
static inline vector float
_asinf4 (vector float x)
{
// positive = (x > 0)
//
vec_uint4 positive = spu_cmpgt(x,spu_splats(0.0f));
// gtHalf = (|x| > 0.5)
//
vec_uint4 gtHalf = spu_cmpabsgt(x,spu_splats(0.5f));
// x = absf(x)
//
x = (vec_float4)spu_and((vec_int4)x,spu_splats((int)0x7fffffff));
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vec_float4 g = spu_sel(spu_mul(x,x),spu_madd(spu_splats(-0.5f),x,spu_splats(0.5f)),gtHalf);
x = spu_sel(x,spu_mul(spu_splats(-2.0f),_sqrtf4(g)),gtHalf);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vec_float4 denom = spu_add(g,spu_splats(-5.54846723f));
vec_float4 num = spu_madd(spu_splats(-0.504400557f),g,spu_splats(0.933933258f));
denom = spu_madd(denom,g,spu_splats(5.603603363f));
num = spu_mul(spu_mul(x,g),num);
// x = x + num / denom
//
x = spu_add(x,_divf4(num,denom));
// if (x > 0.5)
// x = x + M_PI_2
//
x = spu_sel(x,spu_add(x,spu_splats(1.57079632679489661923f)),gtHalf);
// if (!positive) x = -x
//
x = spu_sel((vec_float4)spu_xor(spu_splats((int)0x80000000),(vec_int4)x),x,positive);
return x;
}
#endif

View File

@@ -0,0 +1,66 @@
/* atan2f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATAN2F4_H___
#define ___SIMD_MATH_ATAN2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/atanf4.h>
#include <simdmath/divf4.h>
//
// Inverse tangent function of two variables
//
static inline vector float
_atan2f4 (vector float y, vector float x)
{
vec_float4 res = _atanf4(_divf4(y,x));
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vec_uint4 yNeg = spu_cmpgt(spu_splats(0.0f),y);
vec_uint4 xNeg = spu_cmpgt(spu_splats(0.0f),x);
vec_float4 bias = spu_sel(spu_splats(3.14159265358979323846f),spu_splats(-3.14159265358979323846f),yNeg);
vec_float4 newRes = spu_add(bias, res);
res = spu_sel(res,newRes,xNeg);
return res;
}
#endif

View File

@@ -0,0 +1,81 @@
/* atanf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATANF4_H___
#define ___SIMD_MATH_ATANF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/recipf4.h>
//
// Computes the inverse tangent of all four slots of x.
//
static inline vector float
_atanf4 (vector float x)
{
vec_float4 bias;
vec_float4 x2, x3, x4, x8, x9;
vec_float4 hi, lo;
vec_float4 result;
vec_float4 inv_x;
vec_uint4 sign;
vec_uint4 select;
sign = spu_sl(spu_rlmask((vec_uint4)x, -31), 31);
inv_x = _recipf4(x);
inv_x = (vec_float4)spu_xor((vec_uint4)inv_x, spu_splats(0x80000000u));
select = (vec_uint4)spu_cmpabsgt(x, spu_splats(1.0f));
bias = (vec_float4)spu_or(sign, (vec_uint4)(spu_splats(1.57079632679489661923f)));
bias = (vec_float4)spu_and((vec_uint4)bias, select);
x = spu_sel(x, inv_x, select);
bias = spu_add(bias, x);
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x9 = spu_mul(x8, x);
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
lo = spu_madd(lo, x3, bias);
result = spu_madd(hi, x9, lo);
return result;
}
#endif

View File

@@ -0,0 +1,95 @@
/* cbrtf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CBRTF4_H___
#define ___SIMD_MATH_CBRTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/frexpf4.h>
#include <simdmath/ldexpf4.h>
#include <simdmath/divf4.h>
static inline vec_int4
__cbrtf4_calc_quot(vec_int4 n)
{
vec_int4 quot;
vec_uint4 negxexpmask = spu_cmpgt(spu_splats(0), n);
n = spu_sel(n, spu_add(n,2), negxexpmask);
quot = spu_add(spu_rlmaska(n,-2), spu_rlmaska(n,-4));
quot = spu_add(quot, spu_rlmaska(quot, -4));
quot = spu_add(quot, spu_rlmaska(quot, -8));
quot = spu_add(quot, spu_rlmaska(quot,-16));
vec_int4 r = spu_sub(spu_sub(n,quot), spu_sl(quot,1));
quot = spu_add(quot, spu_rlmaska(spu_add(spu_add(r,5), spu_sl (r,2)), -4));
return quot;
}
#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3)
#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
static inline vector float
_cbrtf4 (vector float x)
{
vec_float4 zeros = spu_splats(0.0f);
vec_uint4 zeromask = spu_cmpeq(x, zeros);
vec_int4 xexp;
vec_float4 sgnmask = (vec_float4)spu_splats(0x7FFFFFFF);
vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x);
x = spu_and(x, sgnmask);
x = _frexpf4(x, &xexp);
vec_float4 p = spu_madd(
spu_madd(x, spu_splats(-0.191502161678719066f), spu_splats(0.697570460207922770f)),
x,
spu_splats(0.492659620528969547f)
);
vec_float4 p3 = spu_mul(p, spu_mul(p, p));
vec_int4 quot = __cbrtf4_calc_quot(xexp);
vec_int4 modval = spu_sub(spu_sub(xexp,quot), spu_sl(quot,1)); // mod = xexp - 3*quotient
vec_float4 factor = spu_splats((float)(1.0/__CBRTF_sqr_cbrt2));
factor = spu_sel(factor, spu_splats((float)(1.0/__CBRTF_cbrt2)), spu_cmpeq(modval,-1));
factor = spu_sel(factor, spu_splats((float)( 1.0)), spu_cmpeq(modval, 0));
factor = spu_sel(factor, spu_splats((float)( __CBRTF_cbrt2)), spu_cmpeq(modval, 1));
factor = spu_sel(factor, spu_splats((float)(__CBRTF_sqr_cbrt2)), spu_cmpeq(modval, 2));
vec_float4 pre = spu_mul(p, factor);
vec_float4 numr = spu_madd(x , spu_splats(2.0f), p3);
vec_float4 denr = spu_madd(p3, spu_splats(2.0f), x );
vec_float4 res = spu_mul(pre, _divf4(numr, denr));
res = _ldexpf4(res, quot);
return spu_sel(spu_sel(res, spu_orc(res,sgnmask), negmask),
zeros,
zeromask);
}
#endif

View File

@@ -0,0 +1,99 @@
/* ceild2 - for each of two doule slots, round up to smallest integer not less than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CEILD2_H___
#define ___SIMD_MATH_CEILD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_ceild2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 one = ((vec_uint4){0, 1, 0, 1});
vec_int4 exp, shift;
vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
vec_double2 in_hi, out;
vec_double2 one_d = spu_splats((double)1.0);
vec_uint4 zero = spu_splats((unsigned int)0x0);
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
in_hi = spu_shuffle(in, in, splat_hi);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
/* clamp shift to the range 0 to -31.
*/
shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
exp_ge0 = spu_cmpgt(exp, 0x3FE);
mask = spu_orc(frac_mask, exp_ge0);
/* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
*/
mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
pos = spu_cmpgt((vec_int4)in_hi, -1);
//pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_and(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
/* insert
*/
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
// e_sign = spu_sel(spu_splats((unsigned int)0x0), (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
e_sign = spu_and( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
/* replace insert
*/
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
/* in + addend
*/
out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
return (out);
}
#endif

View File

@@ -0,0 +1,58 @@
/* ceilf4 - for each of four float slots, round up to smallest integer not less than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CEILF4_H___
#define ___SIMD_MATH_CEILF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_ceilf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one greater.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, 1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is less than input, add one.
return spu_sel( truncated, truncated1, spu_cmpgt( x, truncated ) );
}
#endif

View File

@@ -0,0 +1,43 @@
/* copysignd2 - for each of two double slots, return value with magnitude from x and sign from y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COPYSIGND2_H___
#define ___SIMD_MATH_COPYSIGND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_copysignd2 (vector double x, vector double y)
{
return spu_sel( x, y, spu_splats(0x8000000000000000ull) );
}
#endif

View File

@@ -0,0 +1,43 @@
/* copysignf4 - for each of four float slots, return value with magnitude from x and sign from y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COPYSIGNF4_H___
#define ___SIMD_MATH_COPYSIGNF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_copysignf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_splats(0x80000000) );
}
#endif

View File

@@ -0,0 +1,46 @@
/* cosd2 - Computes the cosine of the each of two double slots.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COSD2_H___
#define ___SIMD_MATH_COSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sincosd2.h>
static inline vector double
_cosd2 (vector double x)
{
vec_double2 s, c;
_sincosd2(x, &s, &c);
return c;
}
#endif

View File

@@ -0,0 +1,46 @@
/* cosf4 - Computes the cosine of each of the four slots by using a polynomial approximation
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COSF4_H___
#define ___SIMD_MATH_COSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sincosf4.h>
static inline vector float
_cosf4 (vector float x)
{
vec_float4 s, c;
_sincosf4(x, &s, &c);
return c;
}
#endif

View File

@@ -0,0 +1,47 @@
/* divd2 - for each of two double slots, divide numer by denom.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVD2_H___
#define ___SIMD_MATH_DIVD2_H___
// Equal to numer * recipd2(denom)
// See recipd2 for results of special values.
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/recipd2.h>
static inline vector double
_divd2 (vector double numer, vector double denom)
{
return spu_mul( numer, _recipd2( denom ) );
}
#endif

View File

@@ -0,0 +1,50 @@
/* divf4 - for each of four float slots, divide numer by denom.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVF4_H___
#define ___SIMD_MATH_DIVF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_divf4 (vector float numer, vector float denom)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Uses constant of 1.0 + 1 ulp to improve accuracy.
vector float y0, y0numer;
vector float oneish = (vector float)spu_splats(0x3f800001);
y0 = spu_re( denom );
y0numer = spu_mul( numer, y0 );
return spu_madd( spu_nmsub( denom, y0, oneish ), y0numer, y0numer );
}
#endif

View File

@@ -0,0 +1,67 @@
/* divi4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVI4_H___
#define ___SIMD_MATH_DIVI4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divu4.h>
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline divi4_t
_divi4 (vector signed int numer, vector signed int denom)
{
divu4_t resAbs;
divi4_t res;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 numerAbs, denomAbs;
// Determine whether result needs sign change
numerPos = spu_cmpgt( numer, -1 );
denomPos = spu_cmpgt( denom, -1 );
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_uint4)spu_sel( spu_sub( 0, numer ), numer, numerPos );
denomAbs = (vec_uint4)spu_sel( spu_sub( 0, denom ), denom, denomPos );
resAbs = _divu4(numerAbs, denomAbs);
res.quot = spu_sel( (vec_int4)resAbs.quot, spu_sub( 0, (vec_int4)resAbs.quot ), quotNeg );
res.rem = spu_sel( spu_sub( 0, (vec_int4)resAbs.rem ), (vec_int4)resAbs.rem, numerPos );
return res;
}
#endif

View File

@@ -0,0 +1,102 @@
/* divu4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVU4_H___
#define ___SIMD_MATH_DIVU4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
// divu4 - for each of four unsigned integer slots, compute quotient and remainder of numer/denom
// and store in divu4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline divu4_t
_divu4 (vector unsigned int numer, vector unsigned int denom)
{
divu4_t res;
vec_int4 shift;
vec_uint4 quot, newQuot;
vec_uint4 denomZeros, numerZeros, denomLeft, oneLeft, denomShifted, oneShifted;
vec_uint4 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
// Any possible negative value will be interpreted as a shift > 31
denomZeros = spu_cntlz( denom );
numerZeros = spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = spu_sl( denom, (vec_uint4)shift );
oneShifted = spu_sl( spu_splats(1U), (vec_uint4)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0U), spu_cmpeq( denom, 0 ) );
// Shift left all leading zeros.
denomLeft = spu_sl( denom, denomZeros );
oneLeft = spu_sl( spu_splats(1U), denomZeros );
quot = spu_splats(0U);
do
{
cont = spu_cmpgt( oneShifted, 0U );
anyCont = spu_extract( spu_gather( cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = spu_cmpgt( denomShifted, numer );
newNum = spu_sub( numer, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = spu_sel( spu_cntlz( newNum ), numerZeros, skip );
shift = (vec_int4)spu_sub( skip, numerZeros );
oneShifted = spu_rlmask( oneLeft, shift );
denomShifted = spu_rlmask( denomLeft, shift );
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
}
#endif

View File

@@ -0,0 +1,135 @@
/* exp2f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXP2F4_H___
#define ___SIMD_MATH_EXP2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
/*
* FUNCTION
* vec_float4 _exp2_v(vec_float4 x)
*
* DESCRIPTION
* _exp2_v computes 2 raised to the input vector x. Computation is
* performed by observing the 2^(a+b) = 2^a * 2^b.
* We decompose x into a and b (above) by letting.
* a = ceil(x), b = x - a;
*
* 2^a is easilty computed by placing a into the exponent
* or a floating point number whose mantissa is all zeros.
*
* 2^b is computed using the following polynomial approximation.
* (C. Hastings, Jr, 1955).
*
* __7__
* \
* \
* 2^(-x) = / Ci*x^i
* /____
* i=1
*
* for x in the range 0.0 to 1.0
*
* C0 = 1.0
* C1 = -0.9999999995
* C2 = 0.4999999206
* C3 = -0.1666653019
* C4 = 0.0416573475
* C5 = -0.0083013598
* C6 = 0.0013298820
* C7 = -0.0001413161
*
* This function does not handle out of range conditions. It
* assumes that x is in the range (-128.0, 127.0]. Values outside
* this range will produce undefined results.
*/
#define __EXP2F_LN2 0.69314718055995f /* ln(2) */
static inline vector float
_exp2f4 (vector float x)
{
vec_int4 ix;
vec_uint4 overflow, underflow;
vec_float4 frac, frac2, frac4;
vec_float4 exp_int, exp_frac;
vec_float4 result;
vec_float4 hi, lo;
vec_float4 bias;
/* Break in the input x into two parts ceil(x), x - ceil(x).
*/
bias = (vec_float4)(spu_rlmaska((vec_int4)(x), -31));
bias = (vec_float4)(spu_andc(spu_splats(0x3F7FFFFFu), (vec_uint4)bias));
ix = spu_convts(spu_add(x, bias), 0);
frac = spu_sub(spu_convtf(ix, 0), x);
frac = spu_mul(frac, spu_splats(__EXP2F_LN2));
// !!! HRD Changing weird un-understandable and incorrect overflow handling code
//overflow = spu_sel((vec_uint4)spu_splats(0x7FFFFFFF), (vec_uint4)x, (vec_uchar16)spu_splats(0x80000000));
overflow = spu_cmpgt(x, (vec_float4)spu_splats(0x4300FFFFu)); // !!! Biggest possible exponent to fit in range.
underflow = spu_cmpgt(spu_splats(-126.0f), x);
//exp_int = (vec_float4)(spu_sl(spu_add(ix, 127), 23)); // !!! HRD <- changing this to correct for
// !!! overflow (x >= 127.999999f)
exp_int = (vec_float4)(spu_sl(spu_add(ix, 126), 23)); // !!! HRD <- add with saturation
exp_int = spu_add(exp_int, exp_int); // !!! HRD
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
frac2 = spu_mul(frac, frac);
frac4 = spu_mul(frac2, frac2);
hi = spu_madd(frac, spu_splats(-0.0001413161f), spu_splats(0.0013298820f));
hi = spu_madd(frac, hi, spu_splats(-0.0083013598f));
hi = spu_madd(frac, hi, spu_splats(0.0416573475f));
lo = spu_madd(frac, spu_splats(-0.1666653019f), spu_splats(0.4999999206f));
lo = spu_madd(frac, lo, spu_splats(-0.9999999995f));
lo = spu_madd(frac, lo, spu_splats(1.0f));
exp_frac = spu_madd(frac4, hi, lo);
ix = spu_add(ix, spu_rlmask((vec_int4)(exp_frac), -23));
result = spu_mul(exp_frac, exp_int);
/* Handle overflow */
result = spu_sel(result, (vec_float4)spu_splats(0x7FFFFFFF), overflow);
result = spu_sel(result, (vec_float4)spu_splats(0), underflow);
//result = spu_sel(result, (vec_float4)(overflow), spu_cmpgt((vec_uint4)(ix), 255));
return (result);
}
#endif

View File

@@ -0,0 +1,70 @@
/* expm1f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPF4_H___
#define ___SIMD_MATH_EXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#include <simdmath/ldexpf4.h>
#define __EXPF_C1 -0.6931470632553101f
#define __EXPF_C2 -1.1730463525082e-7f
#define __EXPF_INVLN2 1.4426950408889634f
static inline vector float
_expf4 (vector float x)
{
vec_uint4 xnegmask = spu_cmpgt(spu_splats(0.0f), x);
vec_float4 goffset = spu_sel(spu_splats(0.5f),spu_splats(-0.5f),xnegmask);
vec_float4 g = spu_mul(x, spu_splats(__EXPF_INVLN2));
vec_int4 xexp = spu_convts(spu_add(g, goffset),0);
g = spu_convtf(xexp, 0);
g = spu_madd(g, spu_splats(__EXPF_C2), spu_madd(g, spu_splats(__EXPF_C1), x));
vec_float4 z = spu_mul(g, g);
vec_float4 a = spu_mul(z, spu_splats(0.0999748594f));
vec_float4 b = spu_mul(g,
spu_madd(z,
spu_splats(0.0083208258f),
spu_splats(0.4999999992f)
)
);
vec_float4 foo = _divf4(spu_add(spu_splats(1.0f), spu_add(a, b)),
spu_add(spu_splats(1.0f), spu_sub(a, b)));
return _ldexpf4(foo, xexp);
}
#endif

View File

@@ -0,0 +1,62 @@
/* expm1f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPMLF4_H___
#define ___SIMD_MATH_EXPMLF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/expf4.h>
#include <simdmath/divf4.h>
#define __EXPM1F_ln1by2 -0.6931471805599f
#define __EXPM1F_ln3by2 0.4054651081082f
static inline vector float
_expm1f4 (vector float x)
{
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(__EXPM1F_ln1by2)),
spu_cmpgt(spu_splats(__EXPM1F_ln3by2), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats(-0.3203561199f), spu_splats(0.9483177697f));
d1 = spu_madd(x2, spu_splats(0.0326527809f), d0);
n0 = spu_madd(x , spu_splats(0.1538026623f), spu_splats(0.9483177732f));
n1 = spu_madd(x , spu_splats(0.0024490478f), spu_splats(0.0305274668f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(spu_sub(_expf4(x), spu_splats(1.0f)),
spu_mul(x, _divf4(n1, d1)),
nearzeromask);
}
#endif

View File

@@ -0,0 +1,42 @@
/* fabsd2 - for each of two double slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FABSD2_H___
#define ___SIMD_MATH_FABSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_fabsd2 (vector double x)
{
return (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
}
#endif

View File

@@ -0,0 +1,42 @@
/* fabsf4 - for each of 4 float slots, compute absolute value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FABSF4_H___
#define ___SIMD_MATH_FABSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_fabsf4 (vector float x)
{
return (vec_float4)spu_andc( (vec_uint4)x, spu_splats(0x80000000) );
}
#endif

View File

@@ -0,0 +1,51 @@
/* fdimd2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FDIMD2_H___
#define ___SIMD_MATH_FDIMD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
/* fdim_v - compute the positive difference of x and y.
*/
static inline vector double
_fdimd2 (vector double x, vector double y)
{
vec_double2 v;
vec_uint4 mask;
v = spu_sub(x, y);
mask = (vec_uint4)spu_shuffle(v, v, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
v = spu_andc(v, (vec_double2)spu_rlmaska(mask, -31));
return (v);
}
#endif

View File

@@ -0,0 +1,43 @@
/* fdimf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FDIMF4_H___
#define ___SIMD_MATH_FDIMF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_fdimf4 (vector float x, vector float y)
{
vec_float4 diff = spu_sub(x,y);
return spu_sel(spu_splats(0.0f),diff, spu_cmpgt(x,y));
}
#endif

View File

@@ -0,0 +1,99 @@
/* floord2 - for each of two doule slots, round up to smallest integer not more than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FLOORD2_H___
#define ___SIMD_MATH_FLOORD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_floord2(vector double in)
{
vec_uchar16 swap_words = ((vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 one = ((vec_uint4){0, 1, 0, 1});
vec_int4 exp, shift;
vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0, e_0, e_00, e_sign, exp_ge0;
vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
vec_double2 in_hi, out;
vec_double2 one_d = spu_splats((double)1.0);
vec_uint4 zero = spu_splats((unsigned int)0x0);
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
in_hi = spu_shuffle(in, in, splat_hi);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
shift = spu_sub(((vec_int4){1023, 1043, 1023, 1043}), exp);
/* clamp shift to the range 0 to -31.
*/
shift = spu_sel(spu_splats((int)-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
frac_mask = spu_rlmask(((vec_uint4){0xFFFFF, -1, 0xFFFFF, -1}), shift);
exp_ge0 = spu_cmpgt(exp, 0x3FE);
mask = spu_orc(frac_mask, exp_ge0);
/* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
*/
mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
pos = spu_cmpgt((vec_int4)in_hi, -1);
//pos = spu_cmpgt((vec_int4)in_hi, 0x0); //it is also work
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
/* insert
*/
e_0 = spu_cmpeq(spu_andc((vec_uint4)in, (vec_uint4)sign), zero);
e_00 = spu_and(e_0, spu_shuffle(e_0, e_0, swap_words));
// e_sign = spu_sel((vec_uint4)one_d, zero, spu_cmpeq( spu_and((vec_uint4)in_hi, spu_splats((unsigned int)0x80000000)), zero));
e_sign = spu_andc( (vec_uint4)one_d, spu_cmpeq( spu_and((vec_uint4)in_hi,spu_splats((unsigned int)0x80000000)), zero));
insert =spu_andc(spu_andc(e_sign, e_00), exp_ge0);
/* replace insert
*/
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
/* in + addend
*/
out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
return (out);
}
#endif

View File

@@ -0,0 +1,58 @@
/* floorf4 - for each of four float slots, round down to largest integer not greater than the value.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FLOORF4_H___
#define ___SIMD_MATH_FLOORF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_floorf4 (vector float x)
{
vec_int4 xi, xi1;
vec_uint4 inrange;
vec_float4 truncated, truncated1;
// Find truncated value and one less.
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
xi1 = spu_add( xi, -1 );
truncated = spu_sel( x, spu_convtf( xi, 0 ), inrange );
truncated1 = spu_sel( x, spu_convtf( xi1, 0 ), inrange );
// If truncated value is greater than input, subtract one.
return spu_sel( truncated, truncated1, spu_cmpgt( truncated, x ) );
}
#endif

View File

@@ -0,0 +1,42 @@
/* fmad2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAD2_H___
#define ___SIMD_MATH_FMAD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_fmad2 (vector double x, vector double y, vector double z)
{
return spu_madd(x,y,z);
}
#endif

View File

@@ -0,0 +1,42 @@
/* fmaf4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAF4_H___
#define ___SIMD_MATH_FMAF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_fmaf4 (vector float x, vector float y, vector float z)
{
return spu_madd(x,y,z);
}
#endif

View File

@@ -0,0 +1,71 @@
/* fmaxd2 - for each of two double slots, compute maximum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAXD2_H___
#define ___SIMD_MATH_FMAXD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
/* Return the maximum numeric value of their arguments. If one argument
* is a NaN, fmax returns the other value. If both are NaNs, then a NaN
* is returned.
*/
static inline vector double
_fmaxd2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
vec_uint4 nan_x, abs_x, gt, eq;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
/* If both x and y are denorm or zero, then set 0x3ff to exponent
*/
denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
/* If x is a NaN, then select y as max
*/
abs_x = spu_andc((vec_uint4)x, sign);
gt = spu_cmpgt(abs_x, infinity);
eq = spu_cmpeq(abs_x, infinity);
nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
diff = spu_sub(x_offset, y_offset);
selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
return spu_sel(x, y, selector);
}
#endif

View File

@@ -0,0 +1,43 @@
/* fmaxf4 - for each of four float slots, compute maximum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAXF4_H___
#define ___SIMD_MATH_FMAXF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_fmaxf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( y, x ) );
}
#endif

View File

@@ -0,0 +1,71 @@
/* fmind2 - for each of two double slots, compute minimum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMIND2_H___
#define ___SIMD_MATH_FMIND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
/* Return the minimum numeric value of their arguments. If one argument
* is a NaN, fmin returns the other value. If both are NaNs, then a NaN
* is returned.
*/
static inline vector double
_fmind2 (vector double x, vector double y)
{
vec_ullong2 selector, denorm;
vec_double2 x_offset, y_offset, diff;
vec_uint4 nan_x, abs_x, gt, eq;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uint4 infinity = (vec_uint4){0x7FF00000, 0, 0x7FF00000, 0};
vec_uint4 exp0 = (vec_uint4){0x3FF00000, 0, 0x3FF00000, 0};
/* If both x and y are denorm or zero, then set 0x3ff to exponent
*/
denorm = (vec_ullong2)spu_cmpeq(spu_and((vec_uint4)spu_or(x, y), infinity), 0);
x_offset = spu_sel(x, spu_or(x, (vec_double2)exp0), denorm);
y_offset = spu_sel(y, spu_or(y, (vec_double2)exp0), denorm);
/* If x is a NaN, then select y as min
*/
abs_x = spu_andc((vec_uint4)x, sign);
gt = spu_cmpgt(abs_x, infinity);
eq = spu_cmpeq(abs_x, infinity);
nan_x = spu_or(gt, spu_and(eq, spu_rlqwbyte(gt, 4)));
diff = spu_sub(y_offset, x_offset);
selector = (vec_ullong2)spu_orc(nan_x, spu_cmpgt((vec_int4)diff, -1));
selector = spu_shuffle(selector, selector, ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}));
return spu_sel(x, y, selector);
}
#endif

View File

@@ -0,0 +1,43 @@
/* fminf4 - for each of four float slots, compute minimum of x and y
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMINF4_H___
#define ___SIMD_MATH_FMINF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_fminf4 (vector float x, vector float y)
{
return spu_sel( x, y, spu_cmpgt( x, y ) );
}
#endif

View File

@@ -0,0 +1,282 @@
/* fmodd2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMODD2_H___
#define ___SIMD_MATH_FMODD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_vec_utils.h>
/*
* a vector is returned that contains the remainder of xi/yi,
* for coresponding elements of vector double x and vector double y,
* as described below:
* if yi is 0, the result is 0
* otherwise, the funciton determines the unique signed integer value i
* such that the returned element is xi - i * yi with the same sign as xi and
* magnitude less than |yi|
*/
static inline vector double
_fmodd2(vector double x, vector double y)
{
int shift0, shift1;
vec_uchar16 swap_words = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
vec_uchar16 propagate = (vec_uchar16){4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192};
vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11};
vec_uchar16 merge = (vec_uchar16){8,9,10,11,12,13,14,15, 24,25,26,27,28,29,30,31};
vec_int4 n, shift, power;
vec_uint4 z;
vec_uint4 x_hi, y_hi;
vec_uint4 abs_x, abs_y;
vec_uint4 exp_x, exp_y;
vec_uint4 zero_x, zero_y;
vec_uint4 mant_x, mant_x0, mant_x1, mant_y ;
vec_uint4 norm, denorm, norm0, norm1, denorm0, denorm1;
vec_uint4 result, result0, resultx, cnt, sign, borrow, mask;
vec_uint4 x_7ff, x_inf, x_nan, y_7ff, y_inf, y_nan, is_normal;
vec_uint4 x_is_norm, y_is_norm, frac_x, frac_y, cnt_x, cnt_y, mant_x_norm, mant_y_norm;
vec_uint4 mant_x_denorm0, mant_x_denorm1, mant_x_denorm;
vec_uint4 mant_y_denorm0, mant_y_denorm1, mant_y_denorm;
vec_uint4 lsb = (vec_uint4)(spu_splats(0x0000000000000001ULL));
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 implied_1 = (vec_uint4)(spu_splats(0x0010000000000000ULL));
vec_uint4 mant_mask = (vec_uint4)(spu_splats(0x000FFFFFFFFFFFFFULL));
sign = spu_and((vec_uint4)x, sign_mask);
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_y = spu_andc((vec_uint4)y, sign_mask);
x_hi = spu_shuffle(abs_x, abs_x, splat_hi);
y_hi = spu_shuffle(abs_y, abs_y, splat_hi);
exp_x = spu_rlmask(x_hi, -20);
exp_y = spu_rlmask(y_hi, -20);
// y>x
resultx = __vec_gt64(abs_y, abs_x);
//is Inf, is Nan
x_7ff = spu_cmpgt(x_hi, spu_splats((unsigned int)0x7fefffff));
x_inf = __vec_eq64_half(abs_x, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
x_nan = spu_andc(x_7ff, x_inf);
y_7ff = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7fefffff));
y_inf = __vec_eq64_half(abs_y, ((vec_uint4){0x7ff00000,0x0,0x7ff00000,0x0}));
y_nan = spu_andc(y_7ff, y_inf);
// is zero
zero_x = __vec_eq64_half(abs_x, spu_splats((unsigned int)0x0));
zero_y = __vec_eq64_half(abs_y, spu_splats((unsigned int)0x0));
/* Determine ilogb of abs_x and abs_y and
* extract the mantissas (mant_x, mant_y)
*/
/* change form*/
// 0 -> ! is_normal
// 0 don't care (because (x=0, y!=0)match x<y, (x!=0 && y=0)match y=0, (x==0 && y==0) resultx)
x_is_norm = spu_cmpgt(x_hi, spu_splats((unsigned int)0x000fffff));
y_is_norm = spu_cmpgt(y_hi, spu_splats((unsigned int)0x000fffff));
frac_x = spu_and((vec_uint4)x, mant_mask);
frac_y = spu_and((vec_uint4)y, mant_mask);
//cntlz(use when denorm)
cnt_x = spu_cntlz(frac_x);
cnt_x = spu_add(cnt_x, spu_and(spu_rlqwbyte(cnt_x, 4), spu_cmpeq(cnt_x, 32)));
cnt_x = spu_add(spu_shuffle(cnt_x, cnt_x, splat_hi), -11);
cnt_y = spu_cntlz(frac_y);
cnt_y = spu_add(cnt_y, spu_and(spu_rlqwbyte(cnt_y, 4), spu_cmpeq(cnt_y, 32)));
cnt_y = spu_add(spu_shuffle(cnt_y, cnt_y, splat_hi), -11);
/*
mant_x_norm = spu_andc(spu_sel(implied_1, abs_x, mant_mask), zero_x);
mant_y_norm = spu_andc(spu_sel(implied_1, abs_y, mant_mask), zero_y);
*/
//norm
mant_x_norm = spu_or(implied_1, frac_x);
mant_y_norm = spu_or(implied_1, frac_y);
//denorm
shift0 = spu_extract(cnt_x, 0);
shift1 = spu_extract(cnt_x, 2);
mant_x_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_x, -8);
mant_x_denorm1 = spu_and((vec_uint4)frac_x, ((vec_uint4){0x0,0x0,-1,-1}));
mant_x_denorm0 = spu_slqwbytebc(spu_slqw(mant_x_denorm0, shift0), shift0);
mant_x_denorm1 = spu_slqwbytebc(spu_slqw(mant_x_denorm1, shift1), shift1);
mant_x_denorm = spu_shuffle(mant_x_denorm0, mant_x_denorm1, merge);
// vec_int4 shift_y = (vec_int4)spu_sub(cnt_y, spu_splats((unsigned int)11));
shift0 = spu_extract(cnt_y, 0);
shift1 = spu_extract(cnt_y, 2);
mant_y_denorm0 = spu_rlmaskqwbyte((vec_uint4)frac_y, -8);
mant_y_denorm1 = spu_and((vec_uint4)frac_y, ((vec_uint4){0x0,0x0,-1,-1}));
mant_y_denorm0 = spu_slqwbytebc(spu_slqw(mant_y_denorm0, shift0), shift0);
mant_y_denorm1 = spu_slqwbytebc(spu_slqw(mant_y_denorm1, shift1), shift1);
mant_y_denorm = spu_shuffle(mant_y_denorm0, mant_y_denorm1, merge);
// mant_x, mant_y( norm | denorm )
mant_x = spu_sel(mant_x_denorm, mant_x_norm, x_is_norm);
mant_y = spu_sel(mant_y_denorm, mant_y_norm, y_is_norm);
/* power
*/
vec_int4 power_x_norm = (vec_int4)exp_x;
vec_int4 power_x_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_x);
vec_int4 power_x = spu_sel(power_x_denorm, power_x_norm, x_is_norm);
vec_int4 power_y_norm = (vec_int4)exp_y;
vec_int4 power_y_denorm = spu_sub(spu_splats((int)1), (vec_int4)cnt_y);
vec_int4 power_y = spu_sel(power_y_denorm, power_y_norm, y_is_norm);
/* Compute fixed point fmod of mant_x and mant_y. Set the flag,
* result0, to all ones if we detect that the final result is
* ever 0.
*/
result0 = spu_or(zero_x, zero_y);
// n = spu_sub((vec_int4)logb_x, (vec_int4)logb_y); //zhao--
n = spu_sub(power_x, power_y);
mask = spu_cmpgt(n, 0);
while (spu_extract(spu_gather(mask), 0)) {
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
result0 = spu_or(spu_and(spu_cmpeq(spu_or(z, spu_shuffle(z, z, swap_words)), 0), mask), result0);
mant_x = spu_sel(mant_x,
spu_sel(spu_slqw(mant_x, 1), spu_andc(spu_slqw(z, 1), lsb), spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1)),
mask);
n = spu_add(n, -1);
mask = spu_cmpgt(n, 0);
}
borrow = spu_genb(mant_x, mant_y);
borrow = spu_shuffle(borrow, borrow, propagate);
z = spu_subx(mant_x, mant_y, borrow);
mant_x = spu_sel(mant_x, z, spu_cmpgt((vec_int4)spu_shuffle(z, z, splat_hi), -1));
result0 = spu_or(spu_cmpeq(spu_or(mant_x, spu_shuffle(mant_x, mant_x, swap_words)), 0), result0);
/* Convert the result back to floating point and restore
* the sign. If we flagged the result to be zero (result0),
* zero it. If we flagged the result to equal its input x,
* (resultx) then return x.
*
* Double precision generates a denorm for an output.
*/
// normal = spu_cmpgt((vec_int4)exp_y, 0);//zhao--
cnt = spu_cntlz(mant_x);
cnt = spu_add(cnt, spu_and(spu_rlqwbyte(cnt, 4), spu_cmpeq(cnt, 32)));
cnt = spu_add(spu_shuffle(cnt, cnt, splat_hi), -11);
mant_x0 = spu_rlmaskqwbyte(mant_x, -8);
mant_x1 = spu_and(mant_x,((vec_uint4){0x0,0x0,-1,-1}));
power =spu_sub(power_y, (vec_int4)cnt);
is_normal = spu_cmpgt(power, 0);
//norm
shift0 = spu_extract(cnt, 0);
shift1 = spu_extract(cnt, 2);
/*
norm0 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x0, implied_1), shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(spu_andc(mant_x1, implied_1), shift1), shift1);
*/
norm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
norm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
norm = spu_shuffle(norm0, norm1, merge);
//denorm
/*
shift = spu_add((vec_int4)exp_y, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
denorm0 = spu_slqwbytebc(spu_slqw(mant_x0, shift0), shift0);
denorm1 = spu_slqwbytebc(spu_slqw(mant_x1, shift1), shift1);
*/
shift = spu_add(power, -1);
shift0 = spu_extract(shift, 0);
shift1 = spu_extract(shift, 2);
// printf("result denorm: shift0=%d, shift1=%d\n",shift0, shift1);
denorm0 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm0, shift0), 7+shift0);
denorm1 = spu_rlmaskqwbytebc(spu_rlmaskqw(norm1, shift1), 7+shift1);
denorm = spu_shuffle(denorm0, denorm1, merge);
// merge
mant_x = spu_sel(denorm, norm, is_normal);
exp_y = (vec_uint4)power;
exp_y = spu_and(spu_rl(exp_y, 20), is_normal);
result = spu_sel(exp_y, spu_or(sign, mant_x),((vec_uint4){0x800FFFFF, -1, 0x800FFFFF, -1}));
//y>x || y<=x
result = spu_sel(spu_andc(result, spu_rlmask(result0, -1)),
(vec_uint4)x, resultx);
//y=+-inf => 0
result = spu_sel(result, (vec_uint4)x, y_inf);
//x=+-inf => NaN
result = spu_sel(result, ((vec_uint4){0x7ff80000, 0x0, 0x7ff80000, 0x0}), x_inf);
//y=0 => 0
result = spu_andc(result, zero_y);
//x=NaN or y=NaN => 0
result = spu_sel(result, (vec_uint4)x, x_nan);
result = spu_sel(result, (vec_uint4)y, y_nan);
return ((vec_double2)result);
}
#endif

View File

@@ -0,0 +1,94 @@
/* fmodf4 - for each of four float slots, compute remainder of x/y defined as x - truncated_integer(x/y) * y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMODF4_H___
#define ___SIMD_MATH_FMODF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#include <simdmath/fabsf4.h>
#include <simdmath/copysignf4.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
static inline vector float
_fmodf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, r1, r2, i;
vec_uint4 inrange;
// Find i = truncated_integer(|x/y|)
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
q = _divf4( x, y );
xabs = _fabsf4( x );
yabs = _fabsf4( y );
qabs = _fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
r1 = spu_nmsub( i1, yabs, xabs2 );
r2 = spu_nmsub( i2, yabs, xabs2 );
i = i0;
i = spu_sel( i1, i, spu_cmpgt( xabs, r1 ) );
i = spu_sel( i2, i, spu_cmpgt( xabs, r2 ) );
i = _copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}
#endif

View File

@@ -0,0 +1,83 @@
/* fpclassifyd2 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FPCLASSIFYD2_H___
#define ___SIMD_MATH_FPCLASSIFYD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector signed long long
_fpclassifyd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 signexpn = spu_splats(0xfff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 mask;
vec_llong2 classtype;
vec_uint4 cmpgt, cmpeq;
//FP_NORMAL: normal unless nan, infinity, zero, or denorm
classtype = spu_splats((long long)FP_NORMAL);
//FP_NAN: all-ones exponent and non-zero mantissa
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)signexpn );
mask = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_NAN), mask );
//FP_INFINITE: all-ones exponent and zero mantissa
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_INFINITE), mask );
//FP_ZERO: zero exponent and zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_ZERO), mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
cmpeq = spu_cmpeq( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)zero );
cmpgt = spu_cmpgt( (vec_uint4)spu_andc( (vec_ullong2)x, signexpn ), (vec_uint4)zero );
mask = (vec_ullong2)spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_or( cmpgt, spu_shuffle( cmpgt, cmpgt, swapEvenOdd ) ) );
classtype = spu_sel( classtype, spu_splats((long long)FP_SUBNORMAL), mask );
return classtype;
}
#endif

View File

@@ -0,0 +1,67 @@
/* fpclassifyf4 - for each element of vector x, return classification of x': FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FPCLASSIFYF4_H___
#define ___SIMD_MATH_FPCLASSIFYF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector signed int
_fpclassifyf4 (vector float x)
{
vec_uint4 zero = spu_splats((unsigned int)0x00000000);
vec_uint4 mask;
vec_uint4 unclassified = spu_splats((unsigned int)0xffffffff);
vec_int4 classtype = (vec_int4)zero;
//FP_NAN: NaN not supported on SPU, never return FP_NAN
//FP_INFINITE: Inf not supported on SPU, never return FP_INFINITE
//FP_ZERO: zero exponent and zero mantissa
mask = spu_cmpeq( spu_andc( (vec_uint4)x, spu_splats((unsigned int)0x80000000)), zero );
classtype = spu_sel( classtype, spu_splats((int)FP_ZERO), mask );
unclassified = spu_andc( unclassified, mask );
//FP_SUBNORMAL: zero exponent and non-zero mantissa
mask = spu_and( spu_cmpeq( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000)), zero ),
spu_cmpgt( spu_and( (vec_uint4)x, spu_splats((unsigned int)0x007fffff)), zero ) );
classtype = spu_sel( classtype, spu_splats((int)FP_SUBNORMAL), mask );
unclassified = spu_andc( unclassified, mask );
//FP_NORMAL: none of the above
classtype = spu_sel( classtype, spu_splats((int)FP_NORMAL), unclassified );
return classtype;
}
#endif

View File

@@ -0,0 +1,98 @@
/* frexpd2 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FREXPD2_H___
#define ___SIMD_MATH_FREXPD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
#define __FREXPD_DBL_NAN 0x7FF8000000000000ull
static inline vector double
_frexpd2 (vector double x, vector signed long long *pexp)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 maskdw = (vec_ullong2){0xffffffffffffffffull, 0ull};
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_ullong2 e0, x0, x1;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, sh, ex;
vec_double2 fr, frac = (vec_double2)zero;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( cmpgt, spu_and( cmpeq, spu_rlqwbyte( cmpgt, -4 ) ) );
isnan = (vec_ullong2)spu_shuffle( isnan, isnan, even );
frac = spu_sel( frac, (vec_double2)spu_splats(__FREXPD_DBL_NAN), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
frac = spu_sel( frac, x , isinf );
//x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
frac = spu_sel( frac, (vec_double2)zero , iszero );
*pexp = spu_sel( *pexp, (vec_llong2)zero , iszero );
//Integer Exponent: if x is normal or subnormal
//...shift left to normalize fraction, zero shift if normal
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
sh = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)11) ), spu_cmpgt( lz0, (int)11 ) );
sh = spu_sel( sh, spu_add( sh, lz ), spu_cmpeq( lz0, (int)32 ) );
x0 = spu_slqw( spu_slqwbytebc( spu_and( (vec_ullong2)x, maskdw ), spu_extract(sh, 1) ), spu_extract(sh, 1) );
x1 = spu_slqw( spu_slqwbytebc( (vec_ullong2)x, spu_extract(sh, 3) ), spu_extract(sh, 3) );
fr = (vec_double2)spu_sel( x1, x0, maskdw );
fr = spu_sel( fr, (vec_double2)spu_splats(0x3FE0000000000000ull), expn );
fr = spu_sel( fr, x, sign );
e0 = spu_rlmaskqw( spu_rlmaskqwbyte(spu_and( (vec_ullong2)x, expn ),-6), -4 );
ex = spu_sel( spu_sub( (vec_int4)e0, spu_splats((int)1022) ), spu_sub( spu_splats((int)-1021), sh ), spu_cmpgt( sh, (int)0 ) );
frac = spu_sel( frac, fr, spu_nor( isnan, spu_or( isinf, iszero ) ) );
*pexp = spu_sel( *pexp, spu_extend( ex ), spu_nor( isnan, spu_or( isinf, iszero ) ) );
return frac;
}
#endif

View File

@@ -0,0 +1,52 @@
/* frexpf4 - for each element of vector x, return the normalized fraction and store the exponent of x'
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FREXPF4_H___
#define ___SIMD_MATH_FREXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_frexpf4 (vector float x, vector signed int *pexp)
{
vec_int4 zeros = spu_splats((int)0);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
vec_int4 e2 = spu_sub(spu_rlmask(e1,-23), spu_splats((int)126));
*pexp = spu_sel(e2, zeros, zeromask);
vec_float4 m2 = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
return spu_sel(m2, (vec_float4)zeros, zeromask);
}
#endif

View File

@@ -0,0 +1,47 @@
/* hypotd2 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_HYPOTD2_H___
#define ___SIMD_MATH_HYPOTD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sqrtd2.h>
static inline vector double
_hypotd2 (vector double x, vector double y)
{
vec_double2 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return _sqrtd2(sum);
}
#endif

View File

@@ -0,0 +1,47 @@
/* hypotf4 - for each element of vector x and y, return the square root of (x')^2 + (y')^2
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_HYPOTF4_H___
#define ___SIMD_MATH_HYPOTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/sqrtf4.h>
static inline vector float
_hypotf4 (vector float x, vector float y)
{
vec_float4 sum = spu_mul(x,x);
sum = spu_madd(y,y,sum);
return _sqrtf4(sum);
}
#endif

View File

@@ -0,0 +1,83 @@
/* ilogbd2 - for each element of vector x, return integer exponent of normalized double x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ILOGBD2_H___
#define ___SIMD_MATH_ILOGBD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <limits.h>
#include <math.h>
static inline vector signed long long
_ilogbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, iszeroinf;
vec_llong2 ilogb = (vec_llong2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//FP_ILOGBNAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGBNAN), isnan );
//FP_ILOGB0: x is zero (zero exponent and zero mantissa) or infinity (all-ones exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszeroinf = (vec_ullong2)spu_or( spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) ),
spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) ) );
ilogb = spu_sel( ilogb, spu_splats((long long)FP_ILOGB0), iszeroinf );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmaskqw( spu_rlmaskqwbyte(e1,-6), -4 );
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
ilogb = spu_sel( ilogb, spu_extend( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023)), spu_add( lz0, lz1 ) ) ), spu_nor( isnan, iszeroinf ) );
return ilogb;
}
#endif

View File

@@ -0,0 +1,50 @@
/* ilogbf4 - for each element of vector x, return integer exponent of x', FP_ILOGBNAN, or FP_ILOGB0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ILOGBF4_H___
#define ___SIMD_MATH_ILOGBF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <limits.h>
#include <math.h>
static inline vector signed int
_ilogbf4 (vector float x)
{
vec_int4 minus127 = spu_splats((int)-127);
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uint4 zeromask = spu_cmpeq(e1, 0);
vec_int4 e2 = spu_add(spu_rlmask(e1,-23), minus127);
return spu_sel(e2, (vec_int4)spu_splats(FP_ILOGB0), zeromask);
}
#endif

View File

@@ -0,0 +1,45 @@
/* irintf4 - for each of four float slots, round to the nearest integer,
consistent with the current rounding model.
On SPU, the rounding mode for float is always towards zero.
vector singned int is returned.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IRINTF4_H___
#define ___SIMD_MATH_IRINTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed int
_irintf4(vector float in)
{
return spu_convts(in,0);
}
#endif

View File

@@ -0,0 +1,61 @@
/* iroundf4 - for each of four float slots, round to the nearest integer,
halfway cases are rounded away form zero.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IROUNDF4_H___
#define ___SIMD_MATH_IROUNDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed int
_iroundf4(vector float in)
{
vec_int4 exp, out;
vec_uint4 addend;
/* Add 0.5 (fixed precision to eliminate rounding issues
*/
exp = spu_sub(125, spu_and(spu_rlmask((vec_int4)in, -23), 0xFF));
addend = spu_and(spu_rlmask( spu_splats((unsigned int)0x1000000), exp),
spu_cmpgt((vec_uint4)exp, -31));
in = (vec_float4)spu_add((vec_uint4)in, addend);
/* Truncate the result.
*/
out = spu_convts(in,0);
return (out);
}
#endif

View File

@@ -0,0 +1,51 @@
/* is0denormd2 - for each of two double slots, if input equals 0 or denorm return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IS0DENORMD2_H___
#define ___SIMD_MATH_IS0DENORMD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_is0denormd2 (vector double x)
{
vec_double2 xexp;
vec_ullong2 cmp;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
xexp = (vec_double2)spu_and( (vec_ullong2)x, spu_splats(0x7ff0000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xexp, (vec_uint4)spu_splats(0) );
cmp = spu_shuffle( cmp, cmp, even );
return cmp;
}
#endif

View File

@@ -0,0 +1,42 @@
/* is0denormf4 - for each element of vector x, return a mask of ones if x' is zero or denorm, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_IS0DENORMF4_H___
#define ___SIMD_MATH_IS0DENORMF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_is0denormf4 (vector float x)
{
return spu_cmpeq( (vec_uint4)spu_and( (vec_uint4)x, spu_splats((unsigned int)0x7f800000) ), (vec_uint4)spu_splats(0x00000000) );
}
#endif

View File

@@ -0,0 +1,61 @@
/* isequald2 - for each of two double slots, if x = y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISEQUALD2_H___
#define ___SIMD_MATH_ISEQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( spu_or( (vec_ullong2)spu_and( cmpeq_i_even, cmpeq_i_odd), bothzero),
spu_or( _isnand2( x ), _isnand2( y ) ) );
}
#endif

View File

@@ -0,0 +1,42 @@
/* isequalf4 - for each element of vector x and y, return a mask of ones if x' is equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISEQUALF4_H___
#define ___SIMD_MATH_ISEQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isequalf4 (vector float x, vector float y)
{
return spu_cmpeq(x, y);
}
#endif

View File

@@ -0,0 +1,51 @@
/* isfinited2 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISFINITED2_H___
#define ___SIMD_MATH_ISFINITED2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_isfinited2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Finite unless NaN or Inf, check for 'not all-ones exponent'
cmpr = (vec_ullong2)spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
}
#endif

View File

@@ -0,0 +1,45 @@
/* isfinitef4 - for each element of vector x, return a mask of ones if x' is finite, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISFINITEF4_H___
#define ___SIMD_MATH_ISFINITEF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isfinitef4 (vector float x)
{
(void)x;
// NaN, INF not supported on SPU, result always a mask of ones
return spu_splats((unsigned int)0xffffffff);
}
#endif

View File

@@ -0,0 +1,71 @@
/* isgreaterd2 - for each of two double slots, if x > y return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATERD2_H___
#define ___SIMD_MATH_ISGREATERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -0,0 +1,73 @@
/* isgreaterequald2 - for each of two double slots, if x is greater or equal to y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATEREQUALD2_H___
#define ___SIMD_MATH_ISGREATEREQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_isgreaterequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel ( cmpgt_ll, cmplt_ll, bothneg ), cmpeq_ll ),
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
}
#endif

View File

@@ -0,0 +1,46 @@
/* isgreaterequalf4 - for each element of vector x and y, return a mask of ones if x' is greater than or equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATEREQUALF4_H___
#define ___SIMD_MATH_ISGREATEREQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isgreaterequalf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpgt(y, x);
return spu_nor(var, var);
}
#endif

View File

@@ -0,0 +1,42 @@
/* isgreaterf4 - for each element of vector x and y, return a mask of ones if x' is greater than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISGREATERF4_H___
#define ___SIMD_MATH_ISGREATERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isgreaterf4 (vector float x, vector float y)
{
return spu_cmpgt(x, y);
}
#endif

View File

@@ -0,0 +1,51 @@
/* isinfd2 - for each of two double slots, if input equals +Inf or -Inf return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISINFD2_H___
#define ___SIMD_MATH_ISINFD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_isinfd2 (vector double x)
{
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_double2 xabs;
vec_ullong2 cmp;
xabs = (vec_double2)spu_andc( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmp = (vec_ullong2)spu_cmpeq( (vec_uint4)xabs, (vec_uint4)spu_splats(0x7ff0000000000000ull) );
cmp = spu_and( cmp, spu_shuffle( cmp, cmp, swapEvenOdd ) );
return cmp;
}
#endif

View File

@@ -0,0 +1,45 @@
/* isinff4 - for each element of vector x, return a mask of ones if x' is INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISINFF4_H___
#define ___SIMD_MATH_ISINFF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isinff4 (vector float x)
{
(void)x;
// INF not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -0,0 +1,71 @@
/* islessd2 - for each of two double slots, if x < y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSD2_H___
#define ___SIMD_MATH_ISLESSD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_islessd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)y, (vec_int4)x );
cmpeq_i = spu_cmpeq( (vec_int4)y, (vec_int4)x );
cmpgt_ui = spu_cmpgt( (vec_uint4)y, (vec_uint4)x );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_sel( cmpgt_ll, cmplt_ll, bothneg ),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -0,0 +1,73 @@
/* islessequald2 - for each of two double slots, if x <= y return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSEQUALD2_H___
#define ___SIMD_MATH_ISLESSEQUALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_islessequald2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpgt_i, cmpgt_ui, cmpeq_i, cmpeq_i_even;
vec_ullong2 cmpgt_ll, cmplt_ll, cmpeq_ll;
vec_ullong2 bothneg, bothzero;
cmpgt_i = spu_cmpgt( (vec_int4)x, (vec_int4)y );
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpgt_ui = spu_cmpgt( (vec_uint4)x, (vec_uint4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpgt_ll = (vec_ullong2)spu_or( spu_shuffle( cmpgt_i, cmpgt_i, even ),
spu_and( cmpeq_i_even, spu_shuffle( cmpgt_ui, cmpgt_ui, odd ) ) );
cmpeq_ll = (vec_ullong2)spu_and( cmpeq_i_even, spu_shuffle( cmpeq_i, cmpeq_i, odd ) );
cmplt_ll = spu_nor( cmpeq_ll, cmpgt_ll );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
cmpeq_ll = spu_or( cmpeq_ll, bothzero);
bothneg = spu_and( (vec_ullong2)x, (vec_ullong2)y );
bothneg = (vec_ullong2)spu_cmpgt( spu_splats(0), (vec_int4)bothneg );
bothneg = spu_shuffle( bothneg, bothneg, even );
return spu_andc( spu_or( spu_sel( cmplt_ll, cmpgt_ll, bothneg ), cmpeq_ll),
spu_or( _isnand2 ( x ), _isnand2 ( y ) ) );
}
#endif

View File

@@ -0,0 +1,46 @@
/* islessequalf4 - for each element of vector x and y, return a mask of ones if x' is less than or equal to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSEQUALF4_H___
#define ___SIMD_MATH_ISLESSEQUALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_islessequalf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpgt(x, y);
return spu_nor(var, var);
}
#endif

View File

@@ -0,0 +1,42 @@
/* islessf4 - for each element of vector x and y, return a mask of ones if x' is less than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSF4_H___
#define ___SIMD_MATH_ISLESSF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_islessf4 (vector float x, vector float y)
{
return spu_cmpgt(y, x);
}
#endif

View File

@@ -0,0 +1,61 @@
/* islessgreaterd2 - for each of two double slots, if x is less or greater than y return a mask of ones, else zero
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSGREATERD2_H___
#define ___SIMD_MATH_ISLESSGREATERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isnand2.h>
static inline vector unsigned long long
_islessgreaterd2 (vector double x, vector double y)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_uint4 cmpeq_i, cmpeq_i_even, cmpeq_i_odd;
vec_ullong2 bothzero;
cmpeq_i = spu_cmpeq( (vec_int4)x, (vec_int4)y );
cmpeq_i_even = spu_shuffle( cmpeq_i, cmpeq_i, even );
cmpeq_i_odd = spu_shuffle( cmpeq_i, cmpeq_i, odd );
bothzero = spu_andc( spu_or( (vec_ullong2)x, (vec_ullong2)y ), sign );
bothzero = (vec_ullong2)spu_cmpeq( (vec_uint4)bothzero, 0U );
bothzero = spu_and( bothzero, spu_shuffle( bothzero, bothzero, swapEvenOdd ) );
return spu_andc( (vec_ullong2)spu_nand( cmpeq_i_even, cmpeq_i_odd),
spu_or( bothzero, spu_or( _isnand2 ( x ), _isnand2 ( y ) ) ) );
}
#endif

View File

@@ -0,0 +1,46 @@
/* islessgreaterf4 - for each element of vector x and y, return a mask of ones if x' is less than or greater than y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISLESSGREATERF4_H___
#define ___SIMD_MATH_ISLESSGREATERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_islessgreaterf4 (vector float x, vector float y)
{
vec_uint4 var;
var = spu_cmpeq(x, y);
return spu_nor(var, var);
}
#endif

View File

@@ -0,0 +1,56 @@
/* isnand2 - for each of two double slots, if input is any type of NaN return mask of ones, else 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNAND2_H___
#define ___SIMD_MATH_ISNAND2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_isnand2 (vector double x)
{
vec_double2 xneg;
vec_ullong2 cmpgt, cmpeq, cmpnan;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uint4 expmask = (vec_uint4)spu_splats(0xfff0000000000000ull);
xneg = (vec_double2)spu_or( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)xneg, expmask );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)xneg, expmask );
cmpnan = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return cmpnan;
}
#endif

View File

@@ -0,0 +1,45 @@
/* isnanf4 - for each element of vector x, return a mask of ones if x' is NaN, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNANF4_H___
#define ___SIMD_MATH_ISNANF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isnanf4 (vector float x)
{
(void)x;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -0,0 +1,53 @@
/* isnormald2 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNORMALD2_H___
#define ___SIMD_MATH_ISNORMALD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_isnormald2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 cmpr;
//Normal unless nan, infinite, denorm, or zero
//Check for 'not zero or all-ones exponent'
cmpr = (vec_ullong2)spu_and( spu_cmpgt( (vec_uint4)spu_and( (vec_ullong2)x, expn ), (vec_uint4)spu_splats(0x0000000000000000ull) ),
spu_cmpgt( (vec_uint4)expn, (vec_uint4)spu_and( (vec_ullong2)x, expn ) ) );
cmpr = spu_shuffle( cmpr, cmpr, even);
return cmpr;
}
#endif

View File

@@ -0,0 +1,43 @@
/* isnormalf4 - for each element of vector x, return a mask of ones if x' is normal, not a NaN or INF, zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISNORMALF4_H___
#define ___SIMD_MATH_ISNORMALF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isnormalf4 (vector float x)
{
// NaN, INF not supported on SPU; normal unless zero
return spu_cmpabsgt(x, (vector float)spu_splats(0x00000000));
}
#endif

View File

@@ -0,0 +1,67 @@
/* isunorderedd2 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISUNORDEREDD2_H___
#define ___SIMD_MATH_ISUNORDEREDD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned long long
_isunorderedd2 (vector double x, vector double y)
{
vec_double2 neg;
vec_ullong2 cmpgt, cmpeq, cmpnanx, cmpnany;
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_ullong2 expn = (vec_ullong2)spu_splats(0xfff0000000000000ull);
vec_ullong2 sign = (vec_ullong2)spu_splats(0x8000000000000000ull);
//Check if x is nan
neg = (vec_double2)spu_or( (vec_ullong2)x, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnanx = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
//Check if y is nan
neg = (vec_double2)spu_or( (vec_ullong2)y, sign );
cmpgt = (vec_ullong2)spu_cmpgt( (vec_uint4)neg, (vec_uint4)expn );
cmpeq = (vec_ullong2)spu_cmpeq( (vec_uint4)neg, (vec_uint4)expn );
cmpnany = spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
return spu_or( cmpnanx, cmpnany );
}
#endif

View File

@@ -0,0 +1,46 @@
/* isunorderedf4 - for each element of vector x and y, return a mask of ones if x' is unordered to y', zero otherwise
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ISUNORDEREDF4_H___
#define ___SIMD_MATH_ISUNORDEREDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector unsigned int
_isunorderedf4 (vector float x, vector float y)
{
(void)x;
(void)y;
// NaN not supported on SPU, result always zero
return spu_splats((unsigned int)0x00000000);
}
#endif

View File

@@ -0,0 +1,266 @@
/* ldexpd2 - Multiply Double by 2 Raised to its Power
For large elements of ex (overflow), returns HUGE_VALF
For small elements of ex (underflow), returns 0.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LDEXPD2_H___
#define ___SIMD_MATH_LDEXPD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_ldexpd2(vector double x, vector signed long long ex)
{
vec_int4 e1, e2;
vec_int4 min = spu_splats(-2099);
// vec_int4 min = spu_splats(-2044);
vec_int4 max = spu_splats( 2098);
// vec_int4 max = spu_splats( 2046);
vec_uint4 cmp_min, cmp_max;
vec_uint4 shift = ((vec_uint4){20, 32, 20, 32});
vec_double2 f1, f2;
vec_double2 out;
vec_double2 in = x;
vec_int4 exp_in;
// check input data range
vec_int4 exp0 = spu_shuffle( (vec_int4)ex, (vec_int4)ex, ((vec_uchar16){4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15}));
vec_int4 dmy = spu_shuffle( (vec_int4)spu_splats(0x10000), (vec_int4)ex, ((vec_uchar16){16,1,2,3, 16,1,2,3, 24,1,2,3,24,1,2,3}));
// (-)0xFFFFFFFF80000000 or (+)0x000000007FFFFFFF
vec_int4 msk_range = ((vec_int4){0,0x80000000, 0,0x80000000});
vec_int4 inrange = spu_addx( (vec_int4)ex, msk_range, spu_rlqwbyte(spu_genc((vec_int4)ex, msk_range), 4));
inrange = (vec_int4)spu_cmpeq( inrange, 0 );
inrange = spu_shuffle(inrange,inrange,((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
// select dummy over ranged data or input data
vec_int4 exp = spu_sel( dmy, exp0, (vec_uint4)inrange);
exp_in = exp;
/* Clamp the specified exponent to the range -2044 to 2046.
*/
cmp_min = spu_cmpgt(exp, min);
cmp_max = spu_cmpgt(exp, max);
exp = spu_sel(min, exp, cmp_min);
exp = spu_sel(exp, max, cmp_max);
/* Generate the factors f1 = 2^e1 and f2 = 2^e2
*/
e1 = spu_rlmaska(exp, -1);
e2 = spu_sub(exp, e1);
f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift);
vec_double2 otmp = spu_mul(x, f1);
vec_uint4 fpscr1 = spu_mffpscr();
f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift);
out = spu_mul(otmp, f2);
vec_uint4 fpscr2 = spu_mffpscr();
/* Compute the product x * 2^e1 * 2^e2
*/
// out = spu_mul(spu_mul(x, f1), f2);
// check floating point register DENORM bit
vec_uint4 fpscr0, fpscr;
fpscr0 = spu_or(fpscr1, fpscr2);
fpscr = spu_shuffle(fpscr0, fpscr0, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,10,0x80,0x80,0x80,6,0x80,0x80,0x80,0x80,0x80}));
fpscr = spu_or(fpscr0, fpscr);
if ( __builtin_expect(spu_extract(fpscr, 1) == 0, 1) ) return out;
//////////////////////
// Denormalized calc//
//////////////////////
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
vec_uint4 signmask = ((vec_uint4){0x80000000,0,0x80000000,0});
vec_int4 zeros = spu_splats(0);
vec_uchar16 msk_64_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
//check input was zero
vec_uint4 x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 x_zero = spu_cmpeq( x_body, (vec_uint4)zeros );
x_zero = spu_and( x_zero, spu_shuffle(x_zero,x_zero,msk_64_eq));
// check Denormalized input
vec_int4 cnt_zero = (vec_int4)spu_cntlz(x_body);
vec_uint4 is_den = (vec_uint4)spu_cmpgt(cnt_zero, 11); // Denormalized data 000XXXXX XXXXXXXX
is_den = spu_shuffle( is_den, is_den, splat_msb);
is_den = spu_sel(is_den, (vec_uint4)zeros, x_zero); // exclude zero from denormalized
// count 0bits for 64bit
vec_uint4 cnt_ex = (vec_uint4)spu_cmpgt(cnt_zero, 31); // Denormalized data 00000000 XXXXXXXX
vec_int4 cnt_z = spu_shuffle( cnt_zero, cnt_zero, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}));
cnt_zero = spu_add(cnt_zero, spu_sel(zeros, cnt_z, cnt_ex));
cnt_zero = spu_shuffle(cnt_zero, cnt_zero, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
// extract each 64bit data
x_body = spu_and( (vec_uint4)x, ((vec_uint4){0x000FFFFF,-1,0x000FFFFF,-1}));
vec_uint4 mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)exp_in, -31);
sign = spu_shuffle(sign, sign, splat_msb);
// set max shift count
vec_int4 sht = spu_add( cnt_zero, ((vec_int4){-11,-64,-11,-64}));
// denorm & exp+ shift left
vec_uint4 cmp = spu_cmpgt( sht, exp_in);
vec_int4 sht_l = spu_sel(sht, exp_in, cmp);
int shtl0 = spu_extract(sht_l, 0);
int shtl1 = spu_extract(sht_l, 2);
vec_uint4 mant0l = spu_slqwbytebc( spu_slqw(mant0, shtl0), shtl0 );
vec_uint4 mant1l = spu_slqwbytebc( spu_slqw(mant1, shtl1), shtl1 );
vec_int4 expp = spu_shuffle(spu_sub(exp_in, sht_l), zeros, ((vec_uchar16){0,1,2,3,0,1,2,3,8,9,10,11,8,9,10,11}));
exp0 = spu_sel( expp, exp_in, sign ); // select plus or minus caluc
vec_uint4 mantl = spu_shuffle( mant0l, mant1l, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
vec_uint4 mant = spu_sel( mantl, (vec_uint4)x, sign);
exp = spu_sel( exp_in, exp0, is_den ); // select denormalized
x = (vec_double2)spu_sel( (vec_uint4)x, mant, is_den);
//////////////////////////////////////////////////////////////////////////
// from ldexpf4
vec_int4 expmask = ((vec_int4){0x7FF00000, 0, 0x7FF00000, 0});
e1 = spu_and((vec_int4)x, expmask);
e2 = spu_rlmask(e1,-20);
vec_uchar16 maxmask = (vec_uchar16)spu_cmpgt(exp, 2046);
vec_uchar16 minmask = (vec_uchar16)spu_cmpgt(spu_splats(-2044), exp);
minmask = spu_or (minmask, (vec_uchar16)x_zero);
vec_int4 esum = spu_add(e2, exp);
maxmask = spu_or (maxmask, (vec_uchar16)spu_cmpgt(esum, 2046));
maxmask = spu_shuffle(maxmask, maxmask, splat_msb);
// maxmask = spu_and(maxmask, ((vec_uchar16)spu_splats((long long)0x7FFFFFFFFFFFFFFFLL)));
minmask = spu_or (minmask, (vec_uchar16)spu_cmpgt(zeros, esum));
minmask = spu_shuffle(minmask, minmask, splat_msb);
// check denorm
vec_uint4 mxmask = spu_and(spu_cmpgt(e2, 0), ((vec_uint4){0x00100000,0,0x00100000,0})); // not denorm
vec_int4 esum2 = spu_sub(esum, (vec_int4)spu_rlmask(mxmask, -20)); // reverse to norm
vec_uint4 mrange = spu_and(spu_cmpgt(zeros, esum2), spu_cmpgt(esum2, -55)); // denorm range
mrange = spu_shuffle(mrange, mrange, splat_msb);
vec_int4 sht_r = spu_sel(spu_splats(-54), esum2, spu_cmpgt(esum2, spu_splats(-54)) );
vec_int4 sht_rh = spu_add( sht_r, ((vec_int4){7,7,7,7}));
x_body = spu_or( x_body, mxmask );
mant0 = spu_shuffle(x_body, x_body, ((vec_uchar16){0,1, 2, 3, 4, 5, 6, 7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
mant1 = spu_shuffle(x_body, x_body, ((vec_uchar16){8,9,10,11,12,13,14,15,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 mant0r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant0, spu_extract(sht_r, 0)), spu_extract(sht_rh,0) );
vec_uint4 mant1r = spu_rlmaskqwbytebc( spu_rlmaskqw(mant1, spu_extract(sht_r, 2)), spu_extract(sht_rh,2) );
#ifdef LDEXPD2_ROUND
// check current round mode
fpscr = spu_shuffle(fpscr2, fpscr2, ((vec_uchar16){0x80,0x80,0x80,0x80,0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
fpscr0 = spu_and(fpscr, ((vec_uint4){0,0xc00,0,0}));
fpscr1 = spu_and(fpscr, ((vec_uint4){0,0x300,0,0}));
// prepare round data
vec_uint4 rnd0 = spu_slqwbytebc( spu_slqw( mant0r, 31), 31);
vec_uint4 rnd1 = spu_slqwbytebc( spu_slqw( mant1r, 31), 31);
vec_uint4 rnd0w = (vec_uint4)spu_cntb( (vec_uchar16)rnd0 );
vec_uint4 rnd1w = (vec_uint4)spu_cntb( (vec_uchar16)rnd1 );
rnd0w = spu_or( spu_slqwbyte(rnd0w,4), spu_slqwbyte(rnd0w,8));
rnd1w = spu_or( spu_slqwbyte(rnd1w,4), spu_slqwbyte(rnd1w,8));
rnd0 = spu_or( rnd0, rnd0w);
rnd1 = spu_or( rnd1, rnd1w);
// nearest
// check half
vec_uint4 hit0 = spu_cmpeq(rnd0, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out
vec_uint4 hit1 = spu_cmpeq(rnd1, ((vec_uint4){0,0xc0000000,0,0})); //odd + round out
vec_uint4 add0 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit0);
vec_uint4 add1 = spu_sel((vec_uint4)zeros, ((vec_uint4){0,1,0,0}), hit1);
// check greater than half
rnd0 = spu_and( rnd0, ((vec_uint4){0,0x7FFFFFFF,0,0}));
rnd1 = spu_and( rnd1, ((vec_uint4){0,0x7FFFFFFF,0,0}));
hit0 = spu_cmpgt(rnd0, ((vec_uint4){0,0x40000000,0,0}));
hit1 = spu_cmpgt(rnd1, ((vec_uint4){0,0x40000000,0,0}));
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// select if fp0
add0 = spu_sel((vec_uint4)zeros, add0, spu_cmpeq(fpscr0, (vec_uint4)zeros));
add1 = spu_sel((vec_uint4)zeros, add1, spu_cmpeq(fpscr1, (vec_uint4)zeros));
// toward zero do nothing
// upward
sign = spu_rlmaska((vec_uint4)in, -31);
vec_uint4 sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,0,0,0,0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 sign1 = spu_shuffle(sign, sign, ((vec_uchar16){0x80,0x80,0x80,0x80,8,8,8,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80}));
vec_uint4 hit0w = spu_cmpgt(rnd0, ((vec_uint4){0,0,0,0}));
vec_uint4 hit1w = spu_cmpgt(rnd1, ((vec_uint4){0,0,0,0}));
hit0 = spu_andc(hit0w, sign0);
hit1 = spu_andc(hit1w, sign1);
hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0x800,0,0})));
hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x200,0,0})));
// select if fp2
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// downward
hit0 = spu_and(hit0w, sign0);
hit1 = spu_and(hit1w, sign1);
hit0 = spu_and(hit0, spu_cmpeq(fpscr0, ((vec_uint4){0,0xc00,0,0})));
hit1 = spu_and(hit1, spu_cmpeq(fpscr1, ((vec_uint4){0,0x300,0,0})));
// select if fp3
add0 = spu_sel(add0, ((vec_uint4){0,1,0,0}), hit0);
add1 = spu_sel(add1, ((vec_uint4){0,1,0,0}), hit1);
// calc round
mant0r = spu_addx(mant0r, add0, spu_rlqwbyte(spu_genc(mant0r, add0), 4));
mant1r = spu_addx(mant1r, add1, spu_rlqwbyte(spu_genc(mant1r, add1), 4));
#endif // LDEXPD2_ROUND
vec_uint4 mantr = spu_shuffle( mant0r, mant1r, ((vec_uchar16){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23}));
// select right answer
x = spu_sel(x, (vec_double2)spu_sl(esum,20), (vec_ullong2)expmask);
x = spu_sel(x, (vec_double2)zeros, (vec_ullong2)minmask);
x = spu_sel(x, (vec_double2)spu_splats((long long)0x7FEFFFFFFFFFFFFFLL), (vec_ullong2)maxmask);
out = (vec_double2)spu_sel((vec_uint4)x , mantr, mrange);
// check Infinity,NaN
vec_uint4 is_inf = spu_cmpeq(e1, expmask);
is_inf = spu_and( is_inf, spu_shuffle(is_inf,is_inf,msk_64_eq));
out = (vec_double2)spu_sel((vec_uint4)out , (vec_uint4)in, is_inf);
out = spu_sel(out, in, (vec_ullong2)signmask);
return out;
}
#endif

View File

@@ -0,0 +1,62 @@
/* ldexpf4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LDEXPF4_H___
#define ___SIMD_MATH_LDEXPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_ldexpf4 (vector float x, vector signed int exp)
{
vec_int4 zeros = spu_splats(0);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 e1 = spu_and((vec_int4)x, (vec_int4)expmask);
vec_int4 e2 = spu_rlmask(e1,-23);
vec_uint4 maxmask = spu_cmpgt(exp, 255);
vec_uint4 minmask = spu_cmpgt(spu_splats(-255), exp);
minmask = spu_or (minmask, spu_cmpeq(x, (vec_float4)zeros));
vec_int4 esum = spu_add(e2, exp);
maxmask = spu_or (maxmask, spu_cmpgt(esum, 255));
maxmask = spu_and(maxmask, spu_splats(0x7FFFFFFFU));
minmask = spu_or (minmask, spu_cmpgt(zeros, esum));
x = spu_sel(x, (vec_float4)spu_sl(esum,23), expmask);
x = spu_sel(x, (vec_float4)zeros, minmask);
//x = spu_sel(x, (vec_float4)spu_splats((int)0xFFFFFFFF), maxmask);
x = spu_sel(x, (vec_float4)maxmask, maxmask);
return x;
}
#endif

View File

@@ -0,0 +1,50 @@
/* llabsi2 - returns absolute value of input.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLABSI2_H___
#define ___SIMD_MATH_LLABSI2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed long long
_llabsi2 (vector signed long long in)
{
vec_uint4 sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign = spu_shuffle(sign, sign, ((vec_uchar16){ 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
vec_uint4 add_1 = ((vec_uint4){0,1,0,1});
vec_uint4 res = spu_nor((vec_uint4)in, (vec_uint4)in);
res = spu_addx( res, add_1, spu_slqwbyte(spu_genc(res, add_1), 4));
res = spu_sel( (vec_uint4)in, res, sign);
return ((vec_llong2)(res));
}
#endif

View File

@@ -0,0 +1,85 @@
/* lldivi2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIVI2_H___
#define ___SIMD_MATH_LLDIVI2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_lldiv.h>
#include <simdmath/lldivu2.h>
static inline vector signed long long
__lldivi2_negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xc0,0xc0,0xc0,0xc0, 12,13,14,15, 0xc0,0xc0,0xc0,0xc0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}
// lldivi2 - for each of two signed long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivi2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline lldivi2_t
_lldivi2 (vector signed long long numer, vector signed long long denom)
{
lldivi2_t res;
lldivu2_t resAbs;
vec_ullong2 numerAbs, denomAbs;
vec_uint4 numerPos, denomPos, quotNeg;
// Determine whether result needs sign change
numerPos = spu_cmpgt((vec_int4)numer, -1);
numerPos = spu_shuffle(numerPos, numerPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
denomPos = spu_cmpgt((vec_int4)denom, -1);
denomPos = spu_shuffle(denomPos, denomPos, ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}));
quotNeg = spu_xor( numerPos, denomPos );
// Use absolute values of numerator, denominator
numerAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(numer), numer, (vec_ullong2)numerPos);
denomAbs = (vec_ullong2)spu_sel(__lldivi2_negatell2(denom), denom, (vec_ullong2)denomPos);
// Get difference of leading zeros.
resAbs = _lldivu2(numerAbs, denomAbs);
res.quot = spu_sel((vec_llong2)resAbs.quot, __lldivi2_negatell2((vec_llong2)resAbs.quot),
(vec_ullong2)quotNeg);
res.rem = spu_sel(__lldivi2_negatell2((vec_llong2)resAbs.rem), (vec_llong2)resAbs.rem,
(vec_ullong2)numerPos);
return res;
}
#endif

View File

@@ -0,0 +1,104 @@
/* lldivu2 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLDIVU2_H___
#define ___SIMD_MATH_LLDIVU2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_lldiv.h>
// lldivu2 - for each of two unsigned long long interger slots, compute quotient and remainder of
// numer/denom and store in lldivu2_t struct. Divide by zero produces quotient = 0, remainder = numerator.
static inline lldivu2_t
_lldivu2 (vector unsigned long long numer, vector unsigned long long denom)
{
lldivu2_t res;
vec_uint4 denomZeros, numerZeros;
vec_int4 shift;
vec_ullong2 denomShifted, oneShifted, denomLeft, oneLeft;
vec_ullong2 quot, newQuot;
vec_ullong2 newNum, skip, cont;
int anyCont;
// Get difference of leading zeros.
denomZeros = (vec_uint4)__ll_spu_cntlz( denom );
numerZeros = (vec_uint4)__ll_spu_cntlz( numer );
shift = (vec_int4)spu_sub( denomZeros, numerZeros );
// Shift denom to align leading one with numerator's
denomShifted = __ll_spu_sl( denom, (vec_ullong2)shift );
oneShifted = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)shift );
oneShifted = spu_sel( oneShifted, spu_splats(0ull), __ll_spu_cmpeq_zero( denom ) );
// Shift left all leading zeros.
denomLeft = __ll_spu_sl( denom, (vec_ullong2)denomZeros );
oneLeft = __ll_spu_sl( spu_splats(1ull), (vec_ullong2)denomZeros );
quot = spu_splats(0ull);
do
{
cont = __ll_spu_cmpgt( oneShifted, spu_splats(0ull) );
anyCont = spu_extract( spu_gather((vec_uint4)cont ), 0 );
newQuot = spu_or( quot, oneShifted );
// Subtract shifted denominator from remaining numerator
// when denominator is not greater.
skip = __ll_spu_cmpgt( denomShifted, numer );
newNum = __ll_spu_sub( numer, denomShifted );
// If denominator is greater, next shift is one more, otherwise
// next shift is number of leading zeros of remaining numerator.
numerZeros = (vec_uint4)spu_sel( __ll_spu_cntlz( newNum ), (vec_ullong2)numerZeros, skip );
shift = (vec_int4)spu_sub( (vec_uint4)skip, numerZeros );
oneShifted = __ll_spu_rlmask( oneLeft, (vec_ullong2)shift );
denomShifted = __ll_spu_rlmask( denomLeft, (vec_ullong2)shift );
quot = spu_sel( newQuot, quot, skip );
numer = spu_sel( newNum, numer, spu_orc(skip,cont) );
}
while ( anyCont );
res.quot = quot;
res.rem = numer;
return res;
}
#endif

View File

@@ -0,0 +1,115 @@
/* llrintd2 - rounds two doubles in to two nearest 64bit integer.
consistent with the current rounding mode.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLRINTD2_H___
#define ___SIMD_MATH_LLRINTD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
static inline vector signed long long
_llrintd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = ((vec_uchar16){0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8});
vec_int4 exp;
vec_uint4 mant, mant0, mant1, sign, mask, borrow;
vec_uint4 implied_one = ((vec_uint4){ 0, 0, 0x00100000, 0});
vec_uint4 exp_mask = ((vec_uint4){-1,-1, 0xFFF00000, 0});
vec_double2 bias;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
// check denormalized
vec_uint4 exp_in = spu_and( (vec_uint4)in, 0x7FF00000 );
vec_uint4 is_denorm = spu_cmpeq( exp_in, 0 );
vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
// check zero
vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
ofs = spu_sel( ofs, vec_zero, is_zerox);
vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
/* Round the input according to the current rounding mode.
*/
vec_uint4 is_large = spu_cmpgt( exp_in, 0x43200000 );
is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
bias = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
bias = spu_sel(bias, xx, (vec_ullong2)spu_splats(0x8000000000000000ULL));
// bias = spu_sel((vec_double2)((vec_ullong2)spu_splats(0x4330000000000000ULL)), xx,
// (vec_ullong2)spu_splats(0x8000000000000000ULL));
mant = (vec_uint4)(spu_sub(spu_add(xx, bias), bias));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)mant, -20), 0x7FF);
exp = spu_add(exp, -1011);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 2);
mask = spu_cmpgt(exp, 0);
mask = spu_shuffle(mask, mask, splat_msb);
/* Algn mantissa bits
*/
mant0 = spu_sel(spu_rlmaskqwbyte(mant, -8), implied_one, exp_mask);
mant1 = spu_sel(mant, implied_one, exp_mask);
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
mant = spu_and(mant, mask);
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)xx, -31);
sign = spu_shuffle(sign, sign, splat_msb);
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}
#endif

View File

@@ -0,0 +1,107 @@
/* llrintf4 - rounds four floats in to four nearest 64bit integer.
On SPU the rounding mode for floats is always towards 0.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLRINTF4_H___
#define ___SIMD_MATH_LLRINTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
static inline llroundf4_t
_llrintf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
vec_uint4 mant0, mant1, mant2, mant3;
vec_uint4 mask, mask0, mask1;
vec_uint4 sign, sign0, sign1;
vec_uint4 borrow0, borrow1;
vec_uint4 res0, res1;
int shift0, shift1, shift2, shift3;
/* Place mantissa bits (including implied most signficant
* bit) into the most significant bits of element 3. Elements
* 0, 1, and 2 are zeroed.
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
exp = spu_add(exp, -94);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 1);
shift2 = spu_extract(exp, 2);
shift3 = spu_extract(exp, 3);
/* Algn mantissa bits
*/
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
mask = spu_cmpgt(exp, 0);
mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res0 = spu_and(res0, mask0);
res1 = spu_and(res1, mask1);
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_xor(res0, sign0);
res1 = spu_xor(res1, sign1);
borrow0 = spu_genb(res0, sign0);
borrow1 = spu_genb(res1, sign1);
borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
return res;
}
#endif

View File

@@ -0,0 +1,97 @@
/* llroundd2 - rounds two doubles in to two nearest 64bit integer.
0.5 will be rounded to far from 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLROUNDD2_H___
#define ___SIMD_MATH_LLROUNDD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
static inline vector signed long long
_llroundd2 (vector double in)
{
int shift0, shift1;
vec_uchar16 splat_msb = { 0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8};
vec_int4 exp;
vec_uint4 mant, mant0, mant1, sign, mask, borrow, addend;
vec_uint4 implied_one = { 0, 0, 0x00100000, 0};
vec_uint4 exp_mask = { -1, -1,0xFFF00000, 0};
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -20), 0x7FF);
exp = spu_add(exp, -1011);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 2);
mask = spu_cmpgt(exp, 0);
mask = spu_shuffle(mask, mask, splat_msb);
/* Algn mantissa bits
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -8), implied_one, exp_mask);
mant1 = spu_sel((vec_uint4)in, implied_one, exp_mask);
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant = spu_shuffle(mant0, mant1, ((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
mant = spu_and(mant, mask);
/* Perform round by adding 1 if the fraction bits are
* greater than or equal to .5
*/
addend = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend = spu_rlmask(addend, -7);
// addend = spu_and(spu_rlqw(mant, 1), ((vec_uint4){ 0,1,0,1}));
mant = spu_addx(mant, addend, spu_rlqwbyte(spu_genc(mant, addend), 4));
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign = spu_shuffle(sign, sign, splat_msb);
mant = spu_xor(mant, sign);
borrow = spu_genb(mant, sign);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){
4,5,6,7, 192,192,192,192,
12,13,14,15, 192,192,192,192}));
mant = spu_subx(mant, sign, borrow);
return ((vec_llong2)(mant));
}
#endif

View File

@@ -0,0 +1,120 @@
/* llroundf4 - rounds four floats in to four nearest 64bit integer.
0.5 will be rounded to far from 0
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LLROUNDF4_H___
#define ___SIMD_MATH_LLROUNDF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
//
// Handles no exception
// over flow will return unspecified data
static inline llroundf4_t
_llroundf4 (vector float in)
{
llroundf4_t res;
vec_int4 exp;
vec_uint4 mant0, mant1, mant2, mant3;
vec_uint4 mask, mask0, mask1;
vec_uint4 sign, sign0, sign1;
vec_uint4 addend0, addend1;
vec_uint4 borrow0, borrow1;
vec_uint4 res0, res1;
int shift0, shift1, shift2, shift3;
/* Place mantissa bits (including implied most signficant
* bit) into the most significant bits of element 3. Elements
* 0, 1, and 2 are zeroed.
*/
mant0 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in,-11), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant1 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -7), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant2 = spu_sel(spu_rlmaskqwbyte((vec_uint4)in, -3), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
mant3 = spu_sel( spu_rlqwbyte((vec_uint4)in, 1), ((vec_uint4){0, 0, 0, 0x80000000}), ((vec_uint4){-1, -1, -1, 0x800000FF}));
/* Determine how many bits to shift the mantissa to correctly
* align it into long long element 0.
*/
exp = spu_and(spu_rlmask((vec_int4)in, -23), 0xFF);
exp = spu_add(exp, -94);
shift0 = spu_extract(exp, 0);
shift1 = spu_extract(exp, 1);
shift2 = spu_extract(exp, 2);
shift3 = spu_extract(exp, 3);
/* Algn mantissa bits
*/
mant0 = spu_slqwbytebc(spu_slqw(mant0, shift0), shift0);
mant1 = spu_slqwbytebc(spu_slqw(mant1, shift1), shift1);
mant2 = spu_slqwbytebc(spu_slqw(mant2, shift2), shift2);
mant3 = spu_slqwbytebc(spu_slqw(mant3, shift3), shift3);
mask = spu_cmpgt(exp, 0);
mask0 = spu_shuffle(mask, mask, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
mask1 = spu_shuffle(mask, mask, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_shuffle(mant0, mant1,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res1 = spu_shuffle(mant2, mant3,((vec_uchar16){0,1,2,3,4,5,6,7, 16,17,18,19,20,21,22,23}));
res0 = spu_and(res0, mask0);
res1 = spu_and(res1, mask1);
/* Perform round by adding 1 if the fraction bits are
* greater than or equal to .5
*/
addend0 = spu_shuffle(mant0, mant1, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend1 = spu_shuffle(mant2, mant3, ((vec_uchar16){0x80,0x80,0x80,0x80,0x80,0x80,0x80,8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,24}));
addend0 = spu_rlmask(addend0, -7);
addend1 = spu_rlmask(addend1, -7);
// addend0 = spu_and(spu_rlqw(res0, 1), ((vec_uint4){ 0,1,0,1}));
// addend1 = spu_and(spu_rlqw(res1, 1), ((vec_uint4){ 0,1,0,1}));
res0 = spu_addx(res0, addend0, spu_rlqwbyte(spu_genc(res0, addend0), 4));
res1 = spu_addx(res1, addend1, spu_rlqwbyte(spu_genc(res1, addend1), 4));
/* Compute the two's complement of the mantissa if the
* input is negative.
*/
sign = (vec_uint4)spu_rlmaska((vec_int4)in, -31);
sign0 = spu_shuffle(sign, sign, ((vec_uchar16){0,0,0,0,0,0,0,0, 4, 4, 4, 4, 4, 4, 4, 4}));
sign1 = spu_shuffle(sign, sign, ((vec_uchar16){8,8,8,8,8,8,8,8, 12,12,12,12,12,12,12,12}));
res0 = spu_xor(res0, sign0);
res1 = spu_xor(res1, sign1);
borrow0 = spu_genb(res0, sign0);
borrow1 = spu_genb(res1, sign1);
borrow0 = spu_shuffle(borrow0, borrow0, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
borrow1 = spu_shuffle(borrow1, borrow1, ((vec_uchar16){4,5,6,7,0xc0,0xc0,0xc0,0xc0, 12,13,14,15,0xc0,0xc0,0xc0,0xc0}));
res.vll[0] = (vec_llong2)spu_subx(res0, sign0, borrow0);
res.vll[1] = (vec_llong2)spu_subx(res1, sign1, borrow1);
return res;
}
#endif

View File

@@ -0,0 +1,83 @@
/* log10f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG10F4_H___
#define ___SIMD_MATH_LOG10F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#define __LOG10F_loga2msb 0.3010299205780f
#define __LOG10F_loga2lsb 7.5085978266e-8f
#define __LOG10F_logaemsb 0.4342944622040f
#define __LOG10F_logaelsb 1.9699272335e-8f
#define __LOG10F_logae 0.4342944819033f
#define __LOG10F_c0 0.2988439998f
#define __LOG10F_c1 0.3997655209f
#define __LOG10F_c2 0.6666679125f
static inline vector float
_log10f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = _divf4 (x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(__LOG10F_c0), w, spu_splats(__LOG10F_c1));
polyw = spu_madd(polyw , w, spu_splats(__LOG10F_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 wnew = spu_convtf(xexp,0);
vec_float4 zz1 = spu_madd(spu_splats(__LOG10F_logaemsb), x1,
spu_mul(spu_splats(__LOG10F_loga2msb),wnew));
vec_float4 zz2 = spu_madd(spu_splats(__LOG10F_logaelsb), x1,
spu_madd(spu_splats(__LOG10F_loga2lsb), wnew,
spu_mul(spu_splats(__LOG10F_logae), yneg))
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}
#endif

View File

@@ -0,0 +1,60 @@
/* log1pf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG1PF4_H___
#define ___SIMD_MATH_LOG1PF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/logf4.h>
#include <simdmath/divf4.h>
static inline vector float
_log1pf4 (vector float x)
{
vec_uint4 nearzeromask = spu_and(spu_cmpgt(x, spu_splats(-0.5f)),
spu_cmpgt(spu_splats(0.5f), x));
vec_float4 x2 = spu_mul(x,x);
vec_float4 d0, d1, n0, n1;
d0 = spu_madd(x , spu_splats(1.5934420741f), spu_splats(0.8952856868f));
d1 = spu_madd(x , spu_splats(0.1198195734f), spu_splats(0.8377145063f));
d1 = spu_madd(x2, d1, d0);
n0 = spu_madd(x , spu_splats(1.1457993413f), spu_splats(0.8952856678f));
n1 = spu_madd(x , spu_splats(0.0082862580f), spu_splats(0.3394238808f));
n1 = spu_madd(x2, n1, n0);
return spu_sel(_logf4(spu_add(x, spu_splats(1.0f))),
spu_mul(x, _divf4(n1, d1)),
nearzeromask);
}
#endif

View File

@@ -0,0 +1,78 @@
/* log2f4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG2F4_H___
#define ___SIMD_MATH_LOG2F4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#define __LOG2F_l2emsb 1.4426950216293f
#define __LOG2F_l2elsb 1.9259629911e-8f
#define __LOG2F_l2e 1.4426950408890f
#define __LOG2F_c0 0.2988439998f
#define __LOG2F_c1 0.3997655209f
#define __LOG2F_c2 0.6666679125f
static inline vector float
_log2f4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = spu_splats((int)0x7F800000);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), (vec_uint4)expmask);
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = _divf4(x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(__LOG2F_c0), w, spu_splats(__LOG2F_c1));
polyw = spu_madd(polyw , w, spu_splats(__LOG2F_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 zz1 = spu_madd(spu_splats(__LOG2F_l2emsb), x1, spu_convtf(xexp,0));
vec_float4 zz2 = spu_madd(spu_splats(__LOG2F_l2elsb), x1,
spu_mul(spu_splats(__LOG2F_l2e), yneg)
);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}
#endif

View File

@@ -0,0 +1,86 @@
/* logbd2 - for each element of vector x, return the exponent of normalized double x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGBD2_H___
#define ___SIMD_MATH_LOGBD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector double
_logbd2 (vector double x)
{
vec_uchar16 even = (vec_uchar16)(vec_uint4){ 0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b };
vec_uchar16 odd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f };
vec_uchar16 swapEvenOdd = (vec_uchar16)(vec_uint4){ 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b };
vec_ullong2 sign = spu_splats(0x8000000000000000ull);
vec_ullong2 expn = spu_splats(0x7ff0000000000000ull);
vec_ullong2 zero = spu_splats(0x0000000000000000ull);
vec_ullong2 isnan, isinf, iszero;
vec_double2 logb = (vec_double2)zero;
vec_llong2 e1, e2;
vec_uint4 cmpgt, cmpeq, cmpzr;
vec_int4 lz, lz0, lz1;
//NAN: x is NaN (all-ones exponent and non-zero mantissa)
cmpgt = spu_cmpgt( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
cmpeq = spu_cmpeq( (vec_uint4)spu_or( (vec_ullong2)x, sign ), (vec_uint4)spu_or(sign, expn) );
isnan = (vec_ullong2)spu_or( spu_shuffle( cmpgt, cmpgt, even ),
spu_and( spu_shuffle( cmpeq, cmpeq, even ),
spu_shuffle( cmpgt, cmpgt, odd ) ) );
logb = spu_sel( logb, (vec_double2)spu_splats(0x7FF8000000000000ll), isnan );
//INF: x is infinite (all-ones exponent and zero mantissa)
isinf = (vec_ullong2)spu_and( cmpeq, spu_shuffle( cmpeq, cmpeq, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats(__builtin_huge_val()), isinf );
//HUGE_VAL: x is zero (zero exponent and zero mantissa)
cmpzr = spu_cmpeq( (vec_uint4)spu_andc( (vec_ullong2)x, sign ), (vec_uint4)zero );
iszero = (vec_ullong2)spu_and( cmpzr, spu_shuffle( cmpzr, cmpzr, swapEvenOdd ) );
logb = spu_sel( logb, (vec_double2)spu_splats(-__builtin_huge_val()), iszero );
//Integer Exponent: if x is normal or subnormal, return unbiased exponent of normalized double x
e1 = (vec_llong2)spu_and( (vec_llong2)x, (vec_llong2)expn );
e2 = (vec_llong2)spu_rlmask((vec_uint4)e1, -20);
lz = (vec_int4)spu_cntlz( (vec_uint4)spu_andc( (vec_ullong2)x, sign) );
lz0 = (vec_int4)spu_shuffle( lz, lz, even );
lz0 = spu_sel( (vec_int4)zero, spu_sub( lz0, spu_splats((int)12) ), spu_cmpgt( lz0, (int)11 ) );
lz1 = spu_sel( (vec_int4)zero, spu_shuffle( lz, lz, odd), spu_cmpeq( lz0, (int)20 ) );
logb = spu_sel( logb, spu_extend( spu_convtf( spu_sub( spu_sub( (vec_int4)e2, spu_splats((int)1023) ), spu_add( lz0, lz1 ) ), 0 ) ),
spu_nor( isnan, spu_or( isinf, iszero ) ) );
return logb;
}
#endif

View File

@@ -0,0 +1,46 @@
/* logbf4 - for each element of vector x, return the exponent of x' as floating point value
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGBF4_H___
#define ___SIMD_MATH_LOGBF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <math.h>
static inline vector float
_logbf4 (vector float x)
{
vec_int4 e1 = spu_and((vec_int4)x, spu_splats((int)0x7F800000));
vec_uint4 zeromask = spu_cmpeq(e1, 0);
e1 = spu_sub(e1, spu_splats((int)0x3F800000));
return spu_sel(spu_convtf(e1,23), (vec_float4)spu_splats(-HUGE_VALF), zeromask);
}
#endif

View File

@@ -0,0 +1,76 @@
/* logf4 - for each of four slots, calculate the natural log
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGF4_H___
#define ___SIMD_MATH_LOGF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#define __LOGF_ln2msb 0.6931470632553f
#define __LOGF_ln2lsb 1.1730463525e-7f
#define __LOGF_c0 0.2988439998f
#define __LOGF_c1 0.3997655209f
#define __LOGF_c2 0.6666679125f
static inline vector float
_logf4 (vector float x)
{
vec_int4 zeros = spu_splats((int)0);
vec_float4 ones = spu_splats(1.0f);
vec_uint4 zeromask = spu_cmpeq(x, (vec_float4)zeros);
vec_uint4 expmask = spu_splats(0x7F800000U);
vec_int4 xexp = spu_add( spu_rlmask(spu_and((vec_int4)x, (vec_int4)expmask), -23), -126 );
x = spu_sel(x, (vec_float4)spu_splats((int)0x3F000000), expmask);
vec_uint4 mask = spu_cmpgt(spu_splats(0.7071067811865f), x);
x = spu_sel(x , spu_add(x, x) , mask);
xexp = spu_sel(xexp, spu_sub(xexp,spu_splats((int)1)), mask);
vec_float4 x1 = spu_sub(x , ones);
vec_float4 z = _divf4 (x1, spu_add(x, ones));
vec_float4 w = spu_mul(z , z);
vec_float4 polyw;
polyw = spu_madd(spu_splats(__LOGF_c0), w, spu_splats(__LOGF_c1));
polyw = spu_madd(polyw , w, spu_splats(__LOGF_c2));
vec_float4 yneg = spu_mul(z, spu_msub(polyw, w, x1));
vec_float4 wnew = spu_convtf(xexp,0);
vec_float4 zz1 = spu_madd(spu_splats(__LOGF_ln2msb), wnew, x1);
vec_float4 zz2 = spu_madd(spu_splats(__LOGF_ln2lsb), wnew, yneg);
return spu_sel(spu_add(zz1,zz2), (vec_float4)zeromask, zeromask);
}
#endif

View File

@@ -0,0 +1,59 @@
/* modfd2 - for each of two double slots, compute fractional and integral parts.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_MODFD2_H___
#define ___SIMD_MATH_MODFD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/truncd2.h>
// Returns fractional part and stores integral part in *iptr.
static inline vector double
_modfd2 (vector double x, vector double *iptr)
{
vec_double2 integral, fraction;
vec_uint4 iszero;
vec_uint4 sign = (vec_uint4){0x80000000, 0, 0x80000000, 0};
vec_uchar16 pattern = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
integral = _truncd2( x );
// if integral is zero, then fraction is x.
iszero = spu_cmpeq(spu_andc((vec_uint4)integral, sign), 0);
iszero = spu_and(iszero, spu_shuffle(iszero, iszero, pattern));
fraction = spu_sel(spu_sub( x, integral ), x, (vec_ullong2)iszero);
*iptr = integral;
return fraction;
}
#endif

View File

@@ -0,0 +1,52 @@
/* modff4 - for each of four float slots, compute fractional and integral parts.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_MODFF4_H___
#define ___SIMD_MATH_MODFF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/truncf4.h>
// Returns fractional part and stores integral part in *iptr.
static inline vector float
_modff4 (vector float x, vector float *iptr)
{
vec_float4 integral, fraction;
integral = _truncf4( x );
fraction = spu_sub( x, integral );
*iptr = integral;
return fraction;
}
#endif

View File

@@ -0,0 +1,76 @@
/* nearbyintd2 - Round the input to the nearest integer according to
the current rounding mode without raising an inexact exception.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEARBYINTD2_H___
#define ___SIMD_MATH_NEARBYINTD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_nearbyintd2(vector double in)
{
vec_uint4 fpscr;
vec_ullong2 sign = ((vec_ullong2){0x8000000000000000ULL,0x8000000000000000ULL});
vec_double2 out, addend;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
fpscr = spu_mffpscr();
// check denormalized
vec_uint4 exp = spu_and( (vec_uint4)in, 0x7FF00000 );
vec_uint4 is_denorm = spu_cmpeq( exp, 0 );
vec_uint4 ofs = spu_and( ((vec_uint4){0x00100000,0,0x00100000,0}), is_denorm);
// check zero
vec_uint4 abs_x = spu_and((vec_uint4)in, ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1}));
vec_uint4 is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox, ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11})));
ofs = spu_sel( ofs, vec_zero, is_zerox);
vec_double2 xx = (vec_double2)spu_or( (vec_uint4)in, ofs );
/* Add 2^53 and then subtract 2^53 to affect a round to be performed by the
* hardware. Also preserve the input sign so that negative inputs that
* round to zero generate a -0.0.
*/
vec_uint4 is_large = spu_cmpgt( exp, 0x43200000 );
is_large = spu_shuffle(is_large,is_large,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
addend = spu_sel((vec_double2)((vec_ullong2){0x4330000000000000ULL,0x4330000000000000ULL}), ((vec_double2){0.0,0.0}), (vec_ullong2)is_large);
addend = spu_sel(addend, xx, sign);
out = spu_sel(spu_sub(spu_add(xx, addend), addend), xx, sign);
spu_mtfpscr(fpscr);
return (out);
}
#endif

View File

@@ -0,0 +1,55 @@
/* nearbyintf4 - for each of four float slots, round to the nearest integer,
consistent with the current rounding model,
without raising an inexact floating-point exception.
On SPU, the rounding mode for float is always towards zero.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEARBYINTF4_H___
#define ___SIMD_MATH_NEARBYINTF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_nearbyintf4(vector float x)
{
vector signed int xi;
vector unsigned int inrange;
// Can convert to and from signed integer to truncate values in range [-2^31, 2^31).
// However, no truncation needed if exponent > 22.
inrange = spu_cmpabsgt( (vector float)spu_splats(0x4b000000), x );
xi = spu_convts( x, 0 );
return spu_sel( x, spu_convtf( xi, 0 ), inrange );
}
#endif

View File

@@ -0,0 +1,42 @@
/* negated2 - for each of two double slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATED2_H___
#define ___SIMD_MATH_NEGATED2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_negated2 (vector double x)
{
return (vec_double2)spu_xor( (vec_ullong2)x, spu_splats(0x8000000000000000ull) );
}
#endif

View File

@@ -0,0 +1,43 @@
/* negatef4 - for each of four float slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATEF4_H___
#define ___SIMD_MATH_NEGATEF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_negatef4 (vector float x)
{
return (vec_float4)spu_xor( (vec_uint4)x, spu_splats(0x80000000) );
}
#endif

View File

@@ -0,0 +1,43 @@
/* negatei4 - for each of 4 signed int slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATEI4_H___
#define ___SIMD_MATH_NEGATEI4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed int
_negatei4 (vector signed int x)
{
vector signed int zero = (vector signed int){0,0,0,0};
return spu_sub (zero, x);
}
#endif

View File

@@ -0,0 +1,47 @@
/* negatell2 - for each of 2 signed long long slots, negate the sign bit.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATELL2_H___
#define ___SIMD_MATH_NEGATELL2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector signed long long
_negatell2 (vector signed long long x)
{
vector signed int zero = (vector signed int){0,0,0,0};
vector signed int borrow;
borrow = spu_genb(zero, (vec_int4)x);
borrow = spu_shuffle(borrow, borrow, ((vec_uchar16){4,5,6,7, 0xC0,0xC0,0xC0,0xC0, 12,13,14,15, 0xC0,0xC0,0xC0,0xC0}));
return (vec_llong2)spu_subx(zero, (vec_int4)x, borrow);
}
#endif

View File

@@ -0,0 +1,97 @@
/* nextafterd2 - find next representable floating-point value towards 2nd param.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEXTAFTERD2_H___
#define ___SIMD_MATH_NEXTAFTERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector double
_nextafterd2 (vector double xx, vector double yy)
{
vec_uint4 abs_x, abs_y, sign_x, abs_dif;
vec_uint4 is_sub, is_zerox, is_zeroy;
vec_uint4 is_equal, is_infy, is_nany;
vec_uint4 res0, res1, res;
vec_uint4 vec_zero = ((vec_uint4){0,0,0,0});
vec_uint4 vec_one = ((vec_uint4){0,1,0,1});
vec_uint4 vec_m1 = ((vec_uint4){0x80000000,1,0x80000000,1});
vec_uint4 msk_exp = ((vec_uint4){0x7FF00000,0,0x7FF00000,0});
vec_uint4 msk_abs = ((vec_uint4){0x7FFFFFFF,-1,0x7FFFFFFF,-1});
vec_uchar16 msk_all_eq = ((vec_uchar16){4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11});
// mask sign bit
abs_x = spu_and( (vec_uint4)xx, msk_abs);
abs_y = spu_and( (vec_uint4)yy, msk_abs);
is_zerox = spu_cmpeq( abs_x, vec_zero);
is_zerox = spu_and( is_zerox, spu_shuffle(is_zerox,is_zerox,msk_all_eq));
// -0 exception
sign_x = spu_and((vec_uint4)xx, ((vec_uint4){0x80000000,0,0x80000000,0}));
sign_x = spu_sel(sign_x, vec_zero, is_zerox);
// if same sign |y| < |x| -> decrease
abs_dif = spu_subx(abs_y, abs_x, spu_rlqwbyte(spu_genb(abs_y, abs_x), 4));
is_sub = spu_xor((vec_uint4)yy, sign_x); // not same sign -> decrease
is_sub = spu_or(is_sub, abs_dif);
is_sub = spu_rlmaska(is_sub, -31);
is_sub = spu_shuffle(is_sub,is_sub,((vec_uchar16){0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8}));
res0 = spu_addx( abs_x, vec_one, spu_rlqwbyte(spu_genc(abs_x,vec_one),4)); // calc increase
res1 = spu_subx( abs_x, vec_one, spu_rlqwbyte(spu_genb(abs_x,vec_one),4)); // calc decrease
res = spu_sel( res0, res1, is_sub); // select increase or decrease
res = spu_or( res, sign_x); // set sign
// check exception
// 0 -> -1
res = spu_sel(res, vec_m1, spu_and(is_zerox, is_sub));
// check equal (include 0,-0)
is_zeroy = spu_cmpeq( abs_y, vec_zero);
is_zeroy = spu_and( is_zeroy, spu_shuffle(is_zeroy,is_zeroy,msk_all_eq));
is_equal = spu_cmpeq((vec_uint4)xx, (vec_uint4)yy);
is_equal = spu_and(is_equal, spu_shuffle(is_equal,is_equal,msk_all_eq));
is_equal = spu_or(is_equal, spu_and(is_zeroy, is_zerox));
res = spu_sel(res, (vec_uint4)yy, is_equal);
// check nan
is_infy = spu_cmpeq( abs_y, msk_exp);
is_infy = spu_and( is_infy, spu_shuffle(is_infy,is_infy,msk_all_eq));
is_nany = spu_and( abs_y, msk_exp);
is_nany = spu_cmpeq( is_nany, msk_exp);
is_nany = spu_and( is_nany, spu_shuffle(is_nany,is_nany,msk_all_eq));
is_nany = spu_sel( is_nany, vec_zero, is_infy);
res = spu_sel(res, (vec_uint4)yy, is_nany);
return (vec_double2)res;
}
#endif

View File

@@ -0,0 +1,72 @@
/* nextafterf4 - for each of four float slots,
return the the next representable value after x in the direction fo y,
if x is euqal to y, the result is y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEXTAFTERF4_H___
#define ___SIMD_MATH_NEXTAFTERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_nextafterf4(vector float x, vector float y)
{
vec_float4 x_not_dec, lala_inc, lala_dec;
vec_uint4 abs_inc_number, abs_dec_number;
vec_uint4 A, B;
//abs_inc, abs_dec
abs_inc_number = spu_sel(spu_splats((unsigned int)0x800000), spu_add((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_cmpabsgt(x, spu_splats(0.0f)));
abs_dec_number = (vec_uint4)spu_add((vec_float4)spu_sub((vec_uint4)x, spu_splats((unsigned int)0x1)), spu_splats(0.0f));
//x<= y
A= spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000));
// in < 0
B= abs_dec_number;
lala_inc = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(spu_splats(0.0f), x));
// in <=0, abs_inc ( if in==0, set result's sign to -)
//A= spu_or(spu_splats((unsigned int)0x80000000), spu_andc(abs_inc_number, spu_splats((unsigned int)0x80000000)));
A= spu_or(abs_inc_number, spu_splats((unsigned int)0x80000000));
// in > 0
B = abs_dec_number;
lala_dec = spu_sel((vec_float4)A, (vec_float4)B, spu_cmpgt(x, spu_splats(0.0f)));
x_not_dec = spu_sel(y, lala_inc, spu_cmpgt(y, x));
// (x <= y) || (x > y)
return spu_sel(x_not_dec, lala_dec, spu_cmpgt(x, y));
}
#endif

View File

@@ -0,0 +1,60 @@
/* powf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_POWF4_H___
#define ___SIMD_MATH_POWF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/exp2f4.h>
#include <simdmath/log2f4.h>
static inline vector float
_powf4 (vector float x, vector float y)
{
vec_int4 zeros = spu_splats((int)0);
vec_uint4 zeromask = spu_cmpeq((vec_float4)zeros, x);
vec_uint4 negmask = spu_cmpgt(spu_splats(0.0f), x);
vec_float4 sbit = (vec_float4)spu_splats((int)0x80000000);
vec_float4 absx = spu_andc(x, sbit);
vec_float4 absy = spu_andc(y, sbit);
vec_uint4 oddy = spu_and(spu_convtu(absy, 0), spu_splats(0x00000001U));
negmask = spu_and(negmask, spu_cmpgt(oddy, (vec_uint4)zeros));
vec_float4 res = _exp2f4(spu_mul(y, _log2f4(absx)));
res = spu_sel(res, spu_or(sbit, res), negmask);
return spu_sel(res, (vec_float4)zeros, zeromask);
}
#endif

View File

@@ -0,0 +1,88 @@
/* recipd2 - for each of two double slots, compute reciprocal.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_RECIPD2_H___
#define ___SIMD_MATH_RECIPD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/isinfd2.h>
#include <simdmath/is0denormd2.h>
#include <simdmath/isnand2.h>
// Handles exceptional values as follows:
// NaN -> NaN
// (+,-)Inf -> (+,-)0
// (+,-)0 -> (+,-)Inf
// Denormal inputs are treated as zero.
static inline vector double
_recipd2 (vector double x)
{
vec_ullong2 expmask, signmask;
vec_double2 one, man, exp, nexp, y1, y2, y3, zero, inf, result;
vec_float4 onef, manf, y0f, y1f;
expmask = spu_splats(0x7ff0000000000000ull);
signmask = spu_splats(0x8000000000000000ull);
onef = spu_splats(1.0f);
one = spu_extend( onef );
// Factor ( mantissa x 2^exponent ) into ( mantissa x 2 ) and ( 2^(exponent-1) ).
// Invert exponent part with subtraction.
exp = spu_and( x, (vec_double2)expmask );
nexp = (vec_double2)spu_sub( (vec_uint4)expmask, (vec_uint4)exp );
man = spu_sel( x, (vec_double2)spu_splats(0x40000000), expmask );
// Compute mantissa part with single and double precision Newton-Raphson steps.
// Then multiply with 2^(1-exponent).
manf = spu_roundtf( man );
y0f = spu_re( manf );
y1f = spu_madd( spu_nmsub( manf, y0f, onef ), y0f, y0f );
y1 = spu_extend( y1f );
y2 = spu_madd( spu_nmsub( man, y1, one ), y1, y1 );
y3 = spu_madd( spu_nmsub( man, y2, one ), y2, y2 );
y3 = spu_mul( y3, nexp );
// Choose iterated result or special value.
zero = spu_and( x, (vec_double2)signmask );
inf = spu_sel( (vec_double2)expmask, x, signmask );
result = spu_sel( y3, zero, _isinfd2 ( x ) );
result = spu_sel( result, inf, _is0denormd2 ( x ) );
result = spu_sel( result, x, _isnand2( x ) );
return result;
}
#endif

View File

@@ -0,0 +1,50 @@
/* recipf4 - for each of four float slots, compute reciprocal.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_RECIPF4_H___
#define ___SIMD_MATH_RECIPF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
static inline vector float
_recipf4 (vector float x)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// A constant of 1.0 + 1 ulp in the Newton-Raphson step results in exact
// answers for powers of 2, and a slightly smaller relative error bound.
vec_float4 y0;
vec_float4 oneish = (vec_float4)spu_splats(0x3f800001);
y0 = spu_re( x );
return spu_madd( spu_nmsub( x, y0, oneish ), y0, y0 );
}
#endif

View File

@@ -0,0 +1,110 @@
/* A vector double is returned that contains the remainder xi REM yi,
for the corresponding elements of vector double x and vector double y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_REMAINDERD2_H___
#define ___SIMD_MATH_REMAINDERD2_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/_remainder.h>
#include <simdmath/fmodd2.h>
static inline vector double
_remainderd2(vector double x, vector double yy)
{
vec_uchar16 splat_hi = ((vec_uchar16){ 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11});
vec_uint4 y_hi;
vec_uint4 abs_x, abs_yy, abs_2x, abs_2y;
vec_uint4 bias;
vec_uint4 nan_out, overflow;
vec_uint4 result;
vec_uint4 half_smax = spu_splats((unsigned int)0x7FEFFFFF);
vec_uint4 sign_mask = (vec_uint4)(spu_splats(0x8000000000000000ULL));
vec_uint4 exp_mask = (vec_uint4)(spu_splats(0x7FF0000000000000ULL));
vec_uint4 val_nan = (vec_uint4)(spu_splats(0x7FF8000000000000ULL));
vec_uint4 vec_zero = spu_splats((unsigned int)0);
vec_uint4 is_zeroy;
// cut sign
abs_x = spu_andc((vec_uint4)x, sign_mask);
abs_yy = spu_andc((vec_uint4)yy, sign_mask);
y_hi = spu_shuffle(abs_yy, abs_yy, splat_hi);
// check nan out
is_zeroy = spu_cmpeq(abs_yy, vec_zero);
is_zeroy = spu_and(is_zeroy, spu_rlqwbyte(is_zeroy, 4));
nan_out = __vec_gt64_half(abs_yy, exp_mask); // y > 7FF00000
nan_out = spu_or(nan_out, spu_cmpgt(abs_x, half_smax)); // x >= 7FF0000000000000
nan_out = spu_or(nan_out, is_zeroy); // y = 0
nan_out = spu_shuffle(nan_out, nan_out, splat_hi);
// make y x2
abs_2y = __rem_twice_d(abs_yy); // 2 x y
result = (vec_uint4)_fmodd2((vec_double2)abs_x, (vec_double2)abs_2y);
// abs_x = spu_sel(spu_andc(result, sign_mask), abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FBFFFFF)));
abs_x = spu_sel(result, abs_x, spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF)));
/* if (2*x > y)
* x -= y
* if (2*x >= y) x -= y
*/
overflow = spu_cmpgt(y_hi, spu_splats((unsigned int)0x7FEFFFFF));
// make x2
abs_2x = __rem_twice_d(abs_x); // 2 x x
bias = __vec_gt64(abs_2x, abs_yy); // abs_2x > abs_yy
bias = spu_andc(bias, overflow);
abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias);
overflow = spu_or(overflow, spu_shuffle(spu_rlmaska(abs_x, -31), vec_zero, splat_hi)); // minous
// make x2
abs_2x = __rem_twice_d(spu_andc(abs_x, sign_mask)); // 2 x x unsupport minous
bias = spu_andc(bias, spu_rlmaska(__rem_sub_d(abs_2x, abs_yy), -31));
bias = spu_andc(spu_shuffle(bias, bias, splat_hi), overflow);
abs_x = spu_sel(abs_x, __rem_sub_d(abs_x, abs_yy), bias);
/* select final answer
*/
result = spu_xor(abs_x, spu_and((vec_uint4)x, sign_mask)); // set sign
result = spu_sel(result, val_nan, nan_out); // if nan
return ((vec_double2)result);
}
#endif

View File

@@ -0,0 +1,115 @@
/* remainderf4 - for each of four float slots, compute remainder of x/y defined as x - nearest_integer(x/y) * y.
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_REMAINDERF4_H___
#define ___SIMD_MATH_REMAINDERF4_H___
#include <simdmath.h>
#include <spu_intrinsics.h>
#include <simdmath/divf4.h>
#include <simdmath/fabsf4.h>
#include <simdmath/copysignf4.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
static inline vector float
_remainderf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2, yabshalf;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, i, rem;
vec_uint4 inrange, odd0, odd1, odd2, cmp1, cmp2, odd;
// Find i = truncated_integer(|x/y|)
// By the error bounds of divf4, if |x/y| is < 2^20, the quotient is at most off by 1.0.
// Thus the exact truncation is either the truncated quotient, one less, or one greater.
q = _divf4( x, y );
xabs = _fabsf4( x );
yabs = _fabsf4( y );
qabs = _fabsf4( q );
xabs2 = spu_add( xabs, xabs );
inrange = spu_cmpabsgt( (vec_float4)spu_splats(0x49800000), q );
inrange = spu_and( inrange, spu_cmpabsgt( (vec_float4)spu_splats(0x7f800000), x ) );
qi1 = spu_convts( qabs, 0 );
qi0 = spu_add( qi1, -1 );
qi2 = spu_add( qi1, 1 );
odd1 = spu_cmpeq( spu_and( qi1, 1 ), 1 );
odd0 = odd2 = spu_nor( odd1, odd1 );
i0 = spu_convtf( qi0, 0 );
i1 = spu_convtf( qi1, 0 );
i2 = spu_convtf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0. Can test instead as
// 2*|x| - i*|y| >= |x|:
//
// With exact inputs, the negative-multiply-subtract gives the exact result rounded towards zero.
// Thus |x| - i*|y| may be < 0 but still round to zero. However, if 2*|x| - i*|y| < |x|, the computed
// answer will be rounded down to < |x|. 2*|x| can be represented exactly provided |x| < 2^128.
cmp1 = spu_cmpgt( xabs, spu_nmsub( i1, yabs, xabs2 ) );
cmp2 = spu_cmpgt( xabs, spu_nmsub( i2, yabs, xabs2 ) );
i = i0;
i = spu_sel( i1, i, cmp1 );
i = spu_sel( i2, i, cmp2 );
odd = odd0;
odd = spu_sel( odd1, odd, cmp1 );
odd = spu_sel( odd2, odd, cmp2 );
rem = spu_nmsub( i, yabs, xabs );
// Test whether i or i+1 = nearest_integer(|x/y|)
//
// i+1 is correct if:
//
// rem > 0.5*|y|
// or
// rem = 0.5*|y| and i is odd
yabshalf = spu_mul( yabs, spu_splats(0.5f) );
cmp1 = spu_cmpgt( rem, yabshalf );
cmp2 = spu_and( spu_cmpeq( rem, yabshalf ), odd );
i = spu_sel( i, spu_add( i, spu_splats(1.0f) ), spu_or( cmp1, cmp2 ) );
i = _copysignf4( i, q );
return spu_sel( spu_splats(0.0f), spu_nmsub( i, y, x ), inrange );
}
#endif

Some files were not shown because too many files have changed in this diff Show More