added updated version of simdmathlibrary-1.0.1

This commit is contained in:
ejcoumans
2007-07-27 18:53:58 +00:00
parent fddd6c5721
commit f360dd27d6
377 changed files with 9928 additions and 6136 deletions

View File

@@ -31,6 +31,7 @@
# All that you do to add a file is edit OBJS, the rest will just work
prefix = /usr
prefix_ppu = $(prefix)
DESTDIR =
OBJS = fabsf4.o absi4.o truncf4.o sqrtf4.o tanf4.o \
@@ -43,10 +44,10 @@ OBJS = fabsf4.o absi4.o truncf4.o sqrtf4.o tanf4.o \
fmodf4.o negatei4.o exp2f4.o powf4.o atanf4.o \
atan2f4.o acosf4.o
INCLUDES_PPU = -I../
INCLUDES_PPU = -I. -I../common
ARCH_PPU = 64
CROSS_PPU = ppu-
ARCH_PPU = 32
CROSS_PPU =
AR_PPU = $(CROSS_PPU)ar
CC_PPU = $(CROSS_PPU)gcc
CXX_PPU = $(CROSS_PPU)g++
@@ -61,6 +62,7 @@ INSTALL = install
MAKE_DEFS = \
prefix='$(prefix)' \
prefix_ppu='$(prefix_ppu)' \
DESTDIR='$(DESTDIR)' \
LIB_BASE='$(LIB_BASE)' \
LIB_NAME='$(LIB_NAME)' \
@@ -78,13 +80,15 @@ MAKE_DEFS = \
LIB_MAJOR_VERSION = 1
LIB_MINOR_VERSION = 0
LIB_RELEASE = 1
LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE)
LIB_BASE = simdmath
LIB_NAME = lib$(LIB_BASE)
STATIC_LIB = $(LIB_NAME).a
SHARED_LIB = $(LIB_NAME).so
SHARED_LIB_SONAME = $(SHARED_LIB).$(LIB_MAJOR_VERSION)
SHARED_LIB_FULL = $(SHARED_LIB).$(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION)
SHARED_LIB_FULL = $(SHARED_LIB).$(LIB_FULL_VERSION)
ALL_LIBS = $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_FULL) $(SHARED_LIB_SONAME)
@@ -98,50 +102,36 @@ $(STATIC_LIB): $(OBJS)
$(AR_PPU) cr $@ $(OBJS)
$(RANLIB_PPU) $@
$(SHARED_LIB): $(OBJS)
$(SHARED_LIB_FULL): $(OBJS)
$(CC_PPU) -shared $(OBJS) -o $@ $(LDFLAGS_PPU) -Wl,-h,$(SHARED_LIB_SONAME)
$(SHARED_LIB_SONAME) $(SHARED_LIB_FULL): $(SHARED_LIB)
ln -fs $(SHARED_LIB) $@
$(SHARED_LIB) $(SHARED_LIB_SONAME): $(SHARED_LIB_FULL)
ln -fs $(SHARED_LIB_FULL) $@
install: $(ALL_LIBS)
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include
$(INSTALL) -m 644 ../simdmath.h $(DESTDIR)$(prefix)/include/
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/lib
$(INSTALL) -m 644 $(STATIC_LIB) $(DESTDIR)$(prefix)/lib/$(STATIC_LIB)
$(INSTALL) -m 755 $(SHARED_LIB) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_FULL)
ln -fs $(SHARED_LIB_FULL) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB_SONAME)
ln -fs $(SHARED_LIB_SONAME) $(DESTDIR)$(prefix)/lib/$(SHARED_LIB)
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/include
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/include/simdmath
$(INSTALL) -m 644 simdmath/*.h $(DESTDIR)$(prefix_ppu)/include/simdmath/
$(INSTALL) -m 755 -d $(DESTDIR)$(prefix_ppu)/lib
$(INSTALL) -m 644 $(STATIC_LIB) $(DESTDIR)$(prefix_ppu)/lib/$(STATIC_LIB)
$(INSTALL) -m 755 $(SHARED_LIB_FULL) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB_FULL)
ln -fs $(SHARED_LIB_FULL) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB_SONAME)
ln -fs $(SHARED_LIB_SONAME) $(DESTDIR)$(prefix_ppu)/lib/$(SHARED_LIB)
clean:
cd tests; $(MAKE) $(MAKE_DEFS) clean
rm -f $(OBJS)
rm -f $(STATIC_LIB) $(SHARED_LIB) $(SHARED_LIB_SONAME) $(SHARED_LIB_FULL)
$(OBJS): ../simdmath.h common-types.h
$(OBJS): ../common/simdmath.h simdmath/_vec_utils.h
check: $(ALL_LIBS)
cd tests; $(MAKE) $(MAKE_DEFS) all; $(MAKE) $(MAKE_DEFS) check
# Some Objects have special header files.
sinf4.o cosf4.o sincosf4.o tanf4.o: sincos_c.h
sinf4.o cosf4.o sincosf4.o tanf4.o: ../common/simdmath/_sincos.h
%.o: %.c
%.o: ../common/%.c simdmath/%.h
$(CC_PPU) $(CFLAGS_PPU) -c $<
#----------
# C++
#----------
%.o: %.C
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cpp
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cc
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cxx
$(CXX_PPU) $(CFLAGS_PPU) -c $<

View File

@@ -1,83 +0,0 @@
/* atanf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
//
// Computes the inverse tangent of all four slots of x.
//
vector float
atanf4 (vector float x)
{
vec_float4 bias;
vec_float4 x2, x3, x4, x8, x9;
vec_float4 hi, lo;
vec_float4 result;
vec_float4 inv_x;
vec_uint4 sign;
vec_uint4 select;
vec_float4 xabs;
vec_float4 vzero = (vec_float4){0.0, 0.0, 0.0, 0.0};
sign = vec_and((vec_uint4)x, vec_splatsu4(0x80000000));
xabs = (vec_float4)vec_andc((vec_uint4)x, vec_splatsu4(0x80000000));
inv_x = recipf4(x);
inv_x = (vec_float4)vec_xor((vec_uint4)inv_x, vec_splatsu4(0x80000000));
select = (vec_uint4)vec_cmpgt(xabs, ((vec_float4){1.0, 1.0, 1.0, 1.0}) );
bias = (vec_float4)vec_or(sign, (vec_uint4)(vec_splatsf4(1.57079632679489661923f)));
bias = (vec_float4)vec_and((vec_uint4)bias, select);
x = vec_sel(x, inv_x, select);
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves(hi and lo).
*/
bias = vec_add(bias, x);
x2 = vec_madd(x, x, vzero);
x3 = vec_madd(x2, x, vzero);
x4 = vec_madd(x2, x2, vzero);
x8 = vec_madd(x4, x4, vzero);
x9 = vec_madd(x8, x, vzero);
hi = vec_madd(vec_splatsf4(0.0028662257), x2, vec_splatsf4(-0.0161657367));
hi = vec_madd(hi, x2, vec_splatsf4(0.0429096138));
hi = vec_madd(hi, x2, vec_splatsf4(-0.0752896400));
hi = vec_madd(hi, x2, vec_splatsf4(0.1065626393));
lo = vec_madd(vec_splatsf4(-0.1420889944), x2, vec_splatsf4(0.1999355085));
lo = vec_madd(lo, x2, vec_splatsf4(-0.3333314528));
lo = vec_madd(lo, x3, bias);
result = vec_madd(hi, x9, lo);
return result;
}

View File

@@ -1,103 +0,0 @@
/* cbrtf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#define __calcQuot(xexp) n = xexp; \
vec_uint4 negxexpmask = (vec_uint4)vec_cmpgt( ((vec_int4){0, 0, 0, 0}) , n); \
n = vec_sel(n, vec_add(n, ((vec_int4){2, 2, 2, 2}) ), negxexpmask); \
\
quot = vec_add(vec_sra(n, ((vec_uint4){2, 2, 2, 2}) ), vec_sra(n, ((vec_uint4){4, 4, 4, 4}) )); \
quot = vec_add(quot, vec_sra(quot, ((vec_uint4){4, 4, 4, 4}) )); \
quot = vec_add(quot, vec_sra(quot, ((vec_uint4){8, 8, 8, 8}) )); \
quot = vec_add(quot, vec_sra(quot, ((vec_uint4){16, 16, 16, 16}) )); \
vec_int4 r = vec_sub(vec_sub(n,quot), vec_sl(quot, ((vec_uint4){1, 1, 1, 1}) )); \
quot = vec_add( \
quot, \
vec_sra( \
vec_add( \
vec_add(r,((vec_int4){5, 5, 5, 5})), \
vec_sl (r,((vec_uint4){2, 2, 2, 2})) \
), \
((vec_uint4){4, 4, 4, 4}) \
) \
); \
#define _CBRTF_H_cbrt2 1.2599210498948731648 // 2^(1/3)
#define _CBRTF_H_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
vector float
cbrtf4 (vector float x)
{
vec_float4 zeros = (vec_float4){0.0f, 0.0f, 0.0f, 0.0f};
vec_int4 xexp, n;
vec_float4 sgnmask = (vec_float4)(vec_splatsi4(0x80000000));
vec_uint4 negmask = (vec_uint4)vec_cmpgt(zeros, x);
x = vec_andc(x, sgnmask);
x = frexpf4(x, &xexp);
vec_float4 p = vec_madd(
vec_madd(x, vec_splatsf4(-0.191502161678719066f), vec_splatsf4(0.697570460207922770f)),
x,
vec_splatsf4(0.492659620528969547f)
);
vec_float4 p3 = vec_madd(p, vec_madd(p, p, zeros), zeros);
vec_int4 quot;
__calcQuot(xexp);
vec_int4 modval = vec_sub(vec_sub(xexp,quot), vec_sl(quot,vec_splatsu4(1))); // mod = xexp - 3*quotient
vec_float4 factor = vec_splatsf4(1.0/_CBRTF_H_sqr_cbrt2);
factor = vec_sel(factor, vec_splatsf4(1.0/_CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4(-1)));
factor = vec_sel(factor, vec_splatsf4( 1.0), vec_cmpeq(modval,vec_splatsi4( 0)));
factor = vec_sel(factor, vec_splatsf4( _CBRTF_H_cbrt2), vec_cmpeq(modval,vec_splatsi4( 1)));
factor = vec_sel(factor, vec_splatsf4(_CBRTF_H_sqr_cbrt2), vec_cmpeq(modval,vec_splatsi4( 2)));
vec_float4 pre = vec_madd(p, factor, zeros);
vec_float4 numr = vec_madd(x , vec_splatsf4(2.0f), p3);
vec_float4 denr = vec_madd(p3, vec_splatsf4(2.0f), x );
vec_float4 res = vec_madd(pre, divf4(numr, denr), zeros);
res = ldexpf4(res, quot);
return vec_sel(res, vec_or(res,sgnmask), negmask);
}
/*
_FUNC_DEF(vec_float4, cbrtf4, (vec_float4 x))
{
vec_uint4 neg = (vec_uint4)vec_cmpgt((vec_float4)(0.0f), x);
vec_float4 sbit = (vec_float4)(vec_float4)((int)0x80000000);
vec_float4 absx = vec_andc(x, sbit);
vec_float4 res = exp2f4(vec_mul((vec_float4)(0.3333333333333f), log2f4(absx)));
res = vec_sel(res, vec_or(sbit, res), neg);
return res;
}
*/

View File

@@ -1,104 +0,0 @@
/* cosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "sincos_c.h"
#include "common-types.h"
//
// Computes the cosine of each of the four slots
// by using a polynomial approximation.
//
vector float
cosf4 (vector float x)
{
vec_float4 xl,xl2,xl3,res;
vec_int4 q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = vec_add(vec_splatsi4(1),vec_and(q,vec_splatsi4((int)0x3)));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = vec_ctf(q,0);
vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x);
xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1));
vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1));
vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2));
vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2));
vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f));
vec_float4 sx = vec_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset,
((vec_int4){0x1, 0x1, 0x1, 0x1})),
((vec_int4){0, 0, 0, 0}));
res = vec_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0));
res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2);
return res;
}

View File

@@ -1,82 +0,0 @@
/* log10f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#define _LOG10F_H_loga2msb ((float) 0.3010299205780f)
#define _LOG10F_H_loga2lsb ((float) 7.5085978266e-8f)
#define _LOG10F_H_logaemsb ((float) 0.4342944622040f)
#define _LOG10F_H_logaelsb ((float) 1.9699272335e-8f)
#define _LOG10F_H_neglogae ((float)-0.4342944819033f)
#define _LOG10F_H_c0 ((float)(-0.2988439998f))
#define _LOG10F_H_c1 ((float)(-0.3997655209f))
#define _LOG10F_H_c2 ((float)(-0.6666679125f))
vector float
log10f4 (vector float x)
{
vec_int4 zeros = vec_splatsi4(0);
vec_float4 ones = vec_splatsf4(1.0f);
//vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros);
vec_int4 expmask = vec_splatsi4(0x7F800000);
vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) );
x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask);
vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x);
x = vec_sel(x , vec_add(x, x) , mask);
xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1)), mask);
vec_float4 x1 = vec_sub(x , ones);
vec_float4 z = divf4 (x1, vec_add(x, ones));
vec_float4 w = vec_madd(z , z, (vec_float4)zeros);
vec_float4 polywneg;
polywneg = vec_madd(vec_splatsf4(_LOG10F_H_c0), w, vec_splatsf4(_LOG10F_H_c1));
polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG10F_H_c2));
vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros);
vec_float4 wnew = vec_ctf(xexp,0);
vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG10F_H_logaemsb), x1,
vec_madd(vec_splatsf4(_LOG10F_H_loga2msb),wnew,(vec_float4)zeros));
vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG10F_H_logaelsb), x1,
vec_madd(vec_splatsf4(_LOG10F_H_loga2lsb), wnew,
vec_madd(vec_splatsf4(_LOG10F_H_neglogae),y,(vec_float4)zeros))
);
//return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask);
return vec_add(zz1, zz2);
}

View File

@@ -1,74 +0,0 @@
/* exp2f4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include <math.h>
#include "common-types.h"
vector float
powf4 (vector float x, vector float y)
{
vec_int4 zeros = (vec_int4){0,0,0,0};
vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x);
vec_uint4 negmask = (vec_uint4)vec_cmpgt((vec_float4)zeros, x);
vec_float4 sbit = (vec_float4)(vec_splatsi4(0x80000000));
vec_float4 absx = vec_andc(x, sbit);
vec_float4 absy = vec_andc(y, sbit);
vec_uint4 oddy = vec_and(vec_ctu(absy, 0), vec_splatsu4(0x00000001));
negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros));
vec_float4 res = exp2f4(vec_madd(y, log2f4(absx), (vec_float4)zeros));
res = vec_sel(res, vec_or(sbit, res), negmask);
return vec_sel(res, (vec_float4)zeros, zeromask);
}
/*
{
vec_int4 zeros = vec_splats(0);
vec_int4 ones = (vec_int4)vec_splats((char)0xFF);
vec_uint4 zeromask = (vec_uint4)vec_cmpeq((vec_float4)zeros, x);
vec_uint4 onemask = (vec_uint4)vec_cmpeq((vec_float4)ones , y);
vec_uint4 negmask = (vec_uint4)vec_cmpgt(vec_splats(0.0f), x);
vec_float4 sbit = (vec_float4)(vec_int4)(0x80000000);
vec_float4 absx = vec_andc(x, sbit);
vec_float4 absy = vec_andc(y, sbit);
vec_uint4 oddy = vec_and(vec_convtu(absy, 0), (vec_uint4)vec_splats(0x00000001));
negmask = vec_and(negmask, (vec_uint4)vec_cmpgt(oddy, (vec_uint4)zeros));
}
*/

View File

@@ -27,26 +27,25 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___COMMON_TYPES_H___
#define ___COMMON_TYPES_H___
#ifndef ___SIMD_MATH_VEC_UTILS_H___
#define ___SIMD_MATH_VEC_UTILS_H___
typedef vector float vec_float4;
typedef vector signed int vec_int4;
typedef vector unsigned int vec_uint4;
static inline vec_float4 vec_splatsf4(const float x)
static inline vector float
__vec_splatsf4(const float x)
{
return (vec_float4) {x, x, x, x};
return (vector float) {x, x, x, x};
}
static inline vec_int4 vec_splatsi4(const signed int x)
static inline vector signed int
__vec_splatsi4(const signed int x)
{
return (vec_int4) {x, x, x, x};
return (vector signed int) {x, x, x, x};
}
static inline vec_uint4 vec_splatsu4(const unsigned int x)
static inline vector unsigned int
__vec_splatsu4(const unsigned int x)
{
return (vec_uint4) {x, x, x, x};
return (vector unsigned int) {x, x, x, x};
}
#endif

View File

@@ -27,14 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ABSI4_H___
#define ___SIMD_MATH_ABSI4_H___
#include <simdmath.h>
#include <altivec.h>
vector signed int
absi4 (vector signed int x)
static inline vector signed int
_absi4 (vector signed int x)
{
return vec_abs( x );
return vec_abs( x );
}
#endif

View File

@@ -27,53 +27,56 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ACOSF4_H___
#define ___SIMD_MATH_ACOSF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/sqrtf4.h>
//
// Computes the inverse cosine of all four slots of x.
//
vector float
acosf4 (vector float x)
static inline vector float
_acosf4 (vector float x)
{
vec_float4 result, xabs;
vec_float4 t1;
vec_float4 xabs2, xabs4;
vec_float4 hi, lo;
vec_float4 neg, pos;
vec_uint4 select;
vector float result, xabs;
vector float t1;
vector float xabs2, xabs4;
vector float hi, lo;
vector float neg, pos;
vector unsigned int select;
xabs = vec_abs(x);
select = (vec_uint4)(vec_sra((vec_int4)(x), ((vec_uint4){31, 31, 31, 31}) ));
xabs = vec_abs(x);
select = (vector unsigned int)(vec_sra((vector signed int)(x), __vec_splatsu4(31)));
t1 = sqrtf4(vec_sub( ((vec_float4){1.0, 1.0, 1.0, 1.0}) , xabs));
t1 = _sqrtf4(vec_sub(__vec_splatsf4(1.0f), xabs));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = vec_madd(xabs, xabs, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) );
xabs4 = vec_madd(xabs2, xabs2, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) );
hi = vec_madd( ((vec_float4){-0.0012624911, -0.0012624911, -0.0012624911, -0.0012624911}) , xabs, ((vec_float4){0.0066700901, 0.0066700901, 0.0066700901, 0.0066700901}) );
hi = vec_madd(hi, xabs, vec_splatsf4(-0.0170881256));
hi = vec_madd(hi, xabs, vec_splatsf4( 0.0308918810));
lo = vec_madd(vec_splatsf4(-0.0501743046), xabs, vec_splatsf4(0.0889789874));
lo = vec_madd(lo, xabs, vec_splatsf4(-0.2145988016));
lo = vec_madd(lo, xabs, vec_splatsf4( 1.5707963050));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
xabs2 = vec_madd(xabs, xabs, __vec_splatsf4(0.0f) );
xabs4 = vec_madd(xabs2, xabs2, __vec_splatsf4(0.0f) );
hi = vec_madd(__vec_splatsf4(-0.0012624911) , xabs, __vec_splatsf4(0.0066700901));
hi = vec_madd(hi, xabs, __vec_splatsf4(-0.0170881256));
hi = vec_madd(hi, xabs, __vec_splatsf4( 0.0308918810));
lo = vec_madd(__vec_splatsf4(-0.0501743046), xabs, __vec_splatsf4(0.0889789874));
lo = vec_madd(lo, xabs, __vec_splatsf4(-0.2145988016));
lo = vec_madd(lo, xabs, __vec_splatsf4( 1.5707963050));
result = vec_madd(hi, xabs4, lo);
result = vec_madd(hi, xabs4, lo);
/* Adjust the result if x is negactive.
*/
neg = vec_nmsub(t1, result, vec_splatsf4(3.1415926535898f));
pos = vec_madd(t1, result, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) );
/* Adjust the result if x is negactive.
*/
neg = vec_nmsub(t1, result, __vec_splatsf4(3.1415926535898f));
pos = vec_madd(t1, result, __vec_splatsf4(0.0f));
result = vec_sel(pos, neg, select);
result = vec_sel(pos, neg, select);
return result;
return result;
}
#endif

View File

@@ -27,60 +27,69 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ASINF4_H___
#define ___SIMD_MATH_ASINF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/divf4.h>
#include <simdmath/sqrtf4.h>
vector float asinf4 (vector float x)
static inline vector float
_asinf4 (vector float x)
{
// positive = (x > 0)
//
vec_uint4 positive = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.0f));
// positive = (x > 0)
//
vector unsigned int positive = (vector unsigned int)vec_cmpgt(x, __vec_splatsf4(0.0f));
// x = absf(x)
//
x = vec_abs(x);
// x = absf(x)
//
x = vec_abs(x);
// gtHalf = (|x| > 0.5)
//
vec_uint4 gtHalf = (vec_uint4)vec_cmpgt(x,vec_splatsf4(0.5f));
// gtHalf = (|x| > 0.5)
//
vector unsigned int gtHalf = (vector unsigned int)vec_cmpgt(x, __vec_splatsf4(0.5f));
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vec_float4 g = vec_sel(vec_madd(x,x,vec_splatsf4(0.0f)),vec_madd(vec_splatsf4(-0.5f),x,vec_splatsf4(0.5f)),gtHalf);
x = vec_sel(x,vec_madd(vec_splatsf4(-2.0f),sqrtf4(g),vec_splatsf4(0.0f)),gtHalf);
// if (x > 0.5)
// g = 0.5 - 0.5*x
// x = -2 * sqrtf(g)
// else
// g = x * x
//
vector float g =
vec_sel(vec_madd(x, x, __vec_splatsf4(0.0f)),
vec_madd(__vec_splatsf4(-0.5f), x, __vec_splatsf4(0.5f)), gtHalf);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vec_float4 denom = vec_add(g,vec_splatsf4(-5.54846723f));
vec_float4 num = vec_madd(vec_splatsf4(-0.504400557f),g,vec_splatsf4(0.933933258f));
denom = vec_madd(denom,g,vec_splatsf4(5.603603363f));
num = vec_madd(vec_madd(x,g,vec_splatsf4(0.0f)),num,vec_splatsf4(0.0f));
x = vec_sel(x, vec_madd(__vec_splatsf4(-2.0f), _sqrtf4(g), __vec_splatsf4(0.0f)), gtHalf);
// Compute the polynomials and take their ratio
// denom = (1.0f*g + -0.554846723e+1f)*g + 5.603603363f
// num = x * g * (-0.504400557f * g + 0.933933258f)
//
vector float denom = vec_add(g, __vec_splatsf4(-5.54846723f));
vector float num = vec_madd(__vec_splatsf4(-0.504400557f), g, __vec_splatsf4(0.933933258f));
denom = vec_madd(denom, g, __vec_splatsf4(5.603603363f));
num = vec_madd(vec_madd(x, g, __vec_splatsf4(0.0f)), num, __vec_splatsf4(0.0f));
// x = x + num / denom
//
x = vec_add(x,divf4(num,denom));
// x = x + num / denom
//
x = vec_add(x,_divf4(num,denom));
// if (x > 0.5)
// x = x + M_PI_2
//
x = vec_sel(x,vec_add(x,vec_splatsf4(1.57079632679489661923f)),gtHalf);
// if (x > 0.5)
// x = x + M_PI_2
//
x = vec_sel(x,vec_add(x, __vec_splatsf4(1.57079632679489661923f)), gtHalf);
// if (!positive) x = -x
//
x = vec_sel((vec_float4)vec_xor(vec_splatsi4(0x80000000),(vec_int4)x),x,positive);
// if (!positive) x = -x
//
x = vec_sel((vector float)vec_xor(__vec_splatsi4(0x80000000), (vector signed int)x),
x, positive);
return x;
return x;
}
#endif

View File

@@ -27,35 +27,41 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATAN2F4_H___
#define ___SIMD_MATH_ATAN2F4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/atanf4.h>
//
// Inverse tangent function of two variables
//
vector float
atan2f4 (vector float y, vector float x)
static inline vector float
_atan2f4 (vector float y, vector float x)
{
vec_float4 res = atanf4(divf4(y,x));
vector float res = _atanf4(divf4(y,x));
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vec_uint4 yNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,y);
vec_uint4 xNeg = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) ,x);
// Use the arguments to determine the quadrant of the result:
// if (x < 0)
// if (y < 0)
// res = -PI + res
// else
// res = PI + res
//
vector unsigned int yNeg = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.0f), y);
vector unsigned int xNeg = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.0f) ,x);
vec_float4 bias = vec_sel(vec_splatsf4(3.14159265358979323846f),vec_splatsf4(-3.14159265358979323846f),yNeg);
vector float bias =
vec_sel(__vec_splatsf4(3.14159265358979323846f),
__vec_splatsf4(-3.14159265358979323846f), yNeg);
vec_float4 newRes = vec_add(bias, res);
vector float newRes = vec_add(bias, res);
res = vec_sel(res,newRes,xNeg);
res = vec_sel(res,newRes,xNeg);
return res;
return res;
}
#endif

View File

@@ -0,0 +1,87 @@
/* atanf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ATANF4_H___
#define ___SIMD_MATH_ATANF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/recipf4.h>
//
// Computes the inverse tangent of all four slots of x.
//
static inline vector float
_atanf4 (vector float x)
{
vector float bias;
vector float x2, x3, x4, x8, x9;
vector float hi, lo;
vector float result;
vector float inv_x;
vector unsigned int sign;
vector unsigned int select;
vector float xabs;
vector float vzero = __vec_splatsf4(0.0f);
sign = vec_and((vector unsigned int)x, __vec_splatsu4(0x80000000));
xabs = (vector float)vec_andc((vector unsigned int)x, __vec_splatsu4(0x80000000));
inv_x = _recipf4(x);
inv_x = (vector float)vec_xor((vector unsigned int)inv_x, __vec_splatsu4(0x80000000));
select = (vector unsigned int)vec_cmpgt(xabs, __vec_splatsf4(1.0f));
bias = (vector float)vec_or(sign, (vector unsigned int)(__vec_splatsf4(1.57079632679489661923f)));
bias = (vector float)vec_and((vector unsigned int)bias, select);
x = vec_sel(x, inv_x, select);
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves(hi and lo).
*/
bias = vec_add(bias, x);
x2 = vec_madd(x, x, vzero);
x3 = vec_madd(x2, x, vzero);
x4 = vec_madd(x2, x2, vzero);
x8 = vec_madd(x4, x4, vzero);
x9 = vec_madd(x8, x, vzero);
hi = vec_madd(__vec_splatsf4(0.0028662257), x2, __vec_splatsf4(-0.0161657367));
hi = vec_madd(hi, x2, __vec_splatsf4(0.0429096138));
hi = vec_madd(hi, x2, __vec_splatsf4(-0.0752896400));
hi = vec_madd(hi, x2, __vec_splatsf4(0.1065626393));
lo = vec_madd(__vec_splatsf4(-0.1420889944), x2, __vec_splatsf4(0.1999355085));
lo = vec_madd(lo, x2, __vec_splatsf4(-0.3333314528));
lo = vec_madd(lo, x3, bias);
result = vec_madd(hi, x9, lo);
return result;
}
#endif

View File

@@ -0,0 +1,97 @@
/* cbrtf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CBRTF4_H___
#define ___SIMD_MATH_CBRTF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/frexpf4.h>
#include <simdmath/ldexpf4.h>
#include <simdmath/divf4.h>
static inline vector signed int
__cbrtf4_calc_quot(vector signed int n)
{
vector signed int quot;
vector unsigned int negxexpmask = (vector unsigned int)vec_cmpgt(__vec_splatsi4(0), n);
n = vec_sel(n, vec_add(n, __vec_splatsi4(2)), negxexpmask);
quot = vec_add(vec_sra(n, __vec_splatsu4(2)), vec_sra(n, __vec_splatsu4(4)));
quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(4)));
quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(8)));
quot = vec_add(quot, vec_sra(quot, __vec_splatsu4(16)));
vector signed int r = vec_sub(vec_sub(n,quot), vec_sl(quot, __vec_splatsu4(1)));
quot = vec_add(quot,
vec_sra(vec_add(vec_add(r, __vec_splatsi4(5)),
vec_sl (r, __vec_splatsu4(2))),
__vec_splatsu4(4)));
return quot;
}
#define __CBRTF_cbrt2 1.2599210498948731648 // 2^(1/3)
#define __CBRTF_sqr_cbrt2 1.5874010519681994748 // 2^(2/3)
static inline vector float
_cbrtf4 (vector float x)
{
vector float zeros = __vec_splatsf4(0.0f);
vector signed int xexp;
vector float sgnmask = (vector float)__vec_splatsi4(0x80000000);
vector unsigned int negmask = (vector unsigned int)vec_cmpgt(zeros, x);
x = vec_andc(x, sgnmask);
x = _frexpf4(x, &xexp);
vector float p =
vec_madd(vec_madd(x, __vec_splatsf4(-0.191502161678719066f), __vec_splatsf4(0.697570460207922770f)),
x,
__vec_splatsf4(0.492659620528969547f));
vector float p3 = vec_madd(p, vec_madd(p, p, zeros), zeros);
vector signed int quot = __cbrtf4_calc_quot(xexp);
// mod = xexp - 3*quotient
vector signed int modval = vec_sub(vec_sub(xexp,quot), vec_sl(quot, __vec_splatsu4(1)));
vector float factor = __vec_splatsf4(1.0/__CBRTF_sqr_cbrt2);
factor = vec_sel(factor, __vec_splatsf4(1.0/__CBRTF_cbrt2), vec_cmpeq(modval, __vec_splatsi4(-1)));
factor = vec_sel(factor, __vec_splatsf4( 1.0), vec_cmpeq(modval, __vec_splatsi4( 0)));
factor = vec_sel(factor, __vec_splatsf4( __CBRTF_cbrt2), vec_cmpeq(modval, __vec_splatsi4( 1)));
factor = vec_sel(factor, __vec_splatsf4(__CBRTF_sqr_cbrt2), vec_cmpeq(modval, __vec_splatsi4( 2)));
vector float pre = vec_madd(p, factor, zeros);
vector float numr = vec_madd(x , __vec_splatsf4(2.0f), p3);
vector float denr = vec_madd(p3, __vec_splatsf4(2.0f), x );
vector float res = vec_madd(pre, _divf4(numr, denr), zeros);
res = _ldexpf4(res, quot);
return vec_sel(res, vec_or(res,sgnmask), negmask);
}
#endif

View File

@@ -27,13 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_CEILF4_H___
#define ___SIMD_MATH_CEILF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
ceilf4 (vector float x)
static inline vector float
_ceilf4(vector float x)
{
return vec_ceil( x );
return vec_ceil( x );
}
#endif

View File

@@ -27,15 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COPYSIGNF4_H___
#define ___SIMD_MATH_COPYSIGNF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
copysignf4 (vector float x, vector float y)
static inline vector float
_copysignf4(vector float x, vector float y)
{
return vec_sel( x, y, vec_splatsu4(0x80000000) );
return vec_sel(x, y, __vec_splatsu4(0x80000000));
}
#endif

View File

@@ -0,0 +1,107 @@
/* cosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_COSF4_H___
#define ___SIMD_MATH_COSF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_sincos.h>
#include <simdmath/_vec_utils.h>
//
// Computes the cosine of each of the four slots
// by using a polynomial approximation.
//
static inline vector float
_cosf4 (vector float x)
{
vector float xl,xl2,xl3,res;
vector signed int q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, __vec_splatsf4(0.63661977236f), __vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl, vec_sel(__vec_splatsf4(0.5f), xl, __vec_splatsu4(0x80000000)));
q = vec_cts(xl, 0);
// Compute an offset based on the quadrant that the angle falls in
//
vector signed int offset = vec_add(__vec_splatsi4(1), vec_and(q, __vec_splatsi4(0x3)));
// Remainder in range [-pi/4..pi/4]
//
vector float qf = vec_ctf(q,0);
vector float p1 = vec_nmsub(qf, __vec_splatsf4(__SINCOSF_KC1), x);
xl = vec_nmsub(qf, __vec_splatsf4(__SINCOSF_KC2), p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl, xl, __vec_splatsf4(0.0f));
xl3 = vec_madd(xl2, xl, __vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0), xl2, __vec_splatsf4(__SINCOSF_CC1));
vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0), xl2, __vec_splatsf4(__SINCOSF_SC1));
vector float ct2 = vec_madd(ct1, xl2, __vec_splatsf4(__SINCOSF_CC2));
vector float st2 = vec_madd(st1, xl2, __vec_splatsf4(__SINCOSF_SC2));
vector float cx = vec_madd(ct2, xl2, __vec_splatsf4(1.0f));
vector float sx = vec_madd(st2, xl3, xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vector unsigned int mask1 =
(vector unsigned int)vec_cmpeq(vec_and(offset, __vec_splatsi4(0x1)), __vec_splatsi4(0));
res = vec_sel(cx, sx, mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vector unsigned int mask2 =
(vector unsigned int)vec_cmpeq(vec_and(offset, __vec_splatsi4(0x2)), __vec_splatsi4(0));
res = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000U), (vector unsigned int)res), res, mask2);
return res;
}
#endif

View File

@@ -27,21 +27,25 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVF4_H___
#define ___SIMD_MATH_DIVF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
divf4 (vector float numer, vector float denom)
static inline vector float
_divf4 (vector float numer, vector float denom)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Reciprocal estimate and 1 Newton-Raphson iteration.
vector float y0, y0numer;
vector float y0, y0numer;
y0 = vec_re( denom );
y0numer = vec_madd( numer, y0, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) );
return vec_madd( vec_nmsub( denom, y0, ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f}) ), y0numer, y0numer );
y0 = vec_re(denom);
y0numer = vec_madd(numer, y0, __vec_splatsf4(0.0f));
return vec_madd(vec_nmsub(denom, y0, __vec_splatsf4(1.0f)), y0numer, y0numer);
}
#endif

View File

@@ -27,63 +27,64 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_DIVI4_H___
#define ___SIMD_MATH_DIVI4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
// divi4 - for each of four integer slots, compute quotient and remainder of numer/denom
// and store in divi4_t struct. Divide by zero produces quotient = 0, remainder = numerator.
divi4_t
divi4 (vec_int4 numer, vec_int4 denom )
static inline divi4_t
_divi4 (vector signed int numer, vector signed int denom )
{
vec_int4 minusone = vec_splatsi4(-1);
vec_uint4 zero = vec_splatsu4(0);
vec_uint4 one = vec_splatsu4(1);
vec_uint4 k158 = vec_splatsu4(158);
vec_uint4 k23 = vec_splatsu4(23);
vector signed int minusone = __vec_splatsi4(-1);
vector unsigned int zero = __vec_splatsu4(0);
vector unsigned int one = __vec_splatsu4(1);
vector unsigned int k158 = __vec_splatsu4(158);
vector unsigned int k23 = __vec_splatsu4(23);
divi4_t res;
vec_uint4 numerPos, denomPos, quotNeg;
vec_uint4 numerAbs, denomAbs;
vec_uint4 denomZeros, numerZeros, shift, denomShifted, oneShifted;
vec_uint4 quot, newQuot, skip, newNum, cont;
int anyCont;
divi4_t res;
vector unsigned int numerPos, denomPos, quotNeg;
vector unsigned int numerAbs, denomAbs;
vector unsigned int denomZeros, numerZeros, shift, denomShifted, oneShifted;
vector unsigned int quot, newQuot, skip, newNum, cont;
int anyCont;
// determine whether result needs sign change
// determine whether result needs sign change
numerPos = (vec_uint4)vec_cmpgt( numer, minusone );
denomPos = (vec_uint4)vec_cmpgt( denom, minusone );
quotNeg = vec_xor( numerPos, denomPos );
numerPos = (vector unsigned int)vec_cmpgt( numer, minusone );
denomPos = (vector unsigned int)vec_cmpgt( denom, minusone );
quotNeg = vec_xor( numerPos, denomPos );
// use absolute values of numerator, denominator
// use absolute values of numerator, denominator
numerAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, numer ), numer, numerPos );
denomAbs = (vec_uint4)vec_sel( vec_sub( (vec_int4)zero, denom ), denom, denomPos );
numerAbs = (vector unsigned int)vec_sel( vec_sub( (vector signed int)zero, numer ), numer, numerPos );
denomAbs = (vector unsigned int)vec_sel( vec_sub( (vector signed int)zero, denom ), denom, denomPos );
// get difference of leading zeros to align denom with numer
// get difference of leading zeros to align denom with numer
denomZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( denomAbs, 0 ), k23 ) );
numerZeros = vec_sub( k158, vec_sr( (vec_uint4)vec_ctf( numerAbs, 0 ), k23 ) );
denomZeros = vec_sub( k158, vec_sr( (vector unsigned int)vec_ctf( denomAbs, 0 ), k23 ) );
numerZeros = vec_sub( k158, vec_sr( (vector unsigned int)vec_ctf( numerAbs, 0 ), k23 ) );
shift = vec_sub( denomZeros, numerZeros );
denomShifted = vec_sl( denomAbs, shift );
oneShifted = vec_sl( one, shift );
oneShifted = vec_sel( oneShifted, zero, vec_or( vec_cmpeq( denomAbs, zero ),
vec_cmpgt( denomAbs, numerAbs ) ) );
shift = vec_sub( denomZeros, numerZeros );
denomShifted = vec_sl( denomAbs, shift );
oneShifted = vec_sl( one, shift );
oneShifted = vec_sel( oneShifted, zero, vec_or( vec_cmpeq( denomAbs, zero ),
vec_cmpgt( denomAbs, numerAbs ) ) );
// long division
// long division
quot = zero;
quot = zero;
do
{
cont = (vec_uint4)vec_cmpgt( oneShifted, zero );
do
{
cont = (vector unsigned int)vec_cmpgt( oneShifted, zero );
anyCont = vec_any_gt( oneShifted, zero );
skip = (vec_uint4)vec_cmpgt( denomShifted, numerAbs );
skip = (vector unsigned int)vec_cmpgt( denomShifted, numerAbs );
newQuot = vec_or( quot, oneShifted );
newNum = vec_sub( numerAbs, denomShifted );
@@ -93,11 +94,12 @@ divi4 (vec_int4 numer, vec_int4 denom )
quot = vec_sel( newQuot, quot, skip );
numerAbs = vec_sel( numerAbs, newNum, vec_andc( cont, skip ) );
}
while ( anyCont );
}
while ( anyCont );
res.quot = (vec_int4)vec_sel( quot, vec_sub( zero, quot ), quotNeg );
res.rem = (vec_int4)vec_sel( (vec_uint4)vec_sub( (vec_int4)zero, (vec_int4)numerAbs ), numerAbs, numerPos );
return res;
res.quot = (vector signed int)vec_sel( quot, vec_sub( zero, quot ), quotNeg );
res.rem = (vector signed int)vec_sel( (vector unsigned int)vec_sub( (vector signed int)zero, (vector signed int)numerAbs ), numerAbs, numerPos );
return res;
}
#endif

View File

@@ -27,15 +27,18 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXP2F4_H___
#define ___SIMD_MATH_EXP2F4_H___
#include <simdmath.h>
#include <altivec.h>
#include <math.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
/*
* FUNCTION
* vec_float4 _exp2_v(vec_float4 x)
* vector float _exp2_v(vector float x)
*
* DESCRIPTION
* _exp2_v computes 2 raised to the input vector x. Computation is
@@ -73,41 +76,37 @@
*/
#define _EXP2F_H_LN2 0.69314718055995f /* ln(2) */
#define __EXP2F_LN2 0.69314718055995f /* ln(2) */
vector float
exp2f4 (vector float x)
static inline vector float
_exp2f4 (vector float x)
{
vec_int4 ix;
vec_uint4 overflow;
vec_uint4 underflow;
vec_float4 frac, frac2, frac4;
vec_float4 exp_int, exp_frac;
vec_float4 result;
vec_float4 hi, lo;
vec_float4 zeros = vec_splatsf4(0.0f);
vec_float4 bias;
vector signed int ix;
vector unsigned int overflow;
vector unsigned int underflow;
vector float frac, frac2, frac4;
vector float exp_int, exp_frac;
vector float result;
vector float hi, lo;
vector float zeros = __vec_splatsf4(0.0f);
vector float bias;
/* Break in the input x into two parts ceil(x), x - ceil(x).
*/
#if 1
bias = (vec_float4)(vec_sra((vec_int4)x, vec_splatsu4(31) ));
bias = (vec_float4)(vec_andc(vec_splatsu4(0x3F7FFFFF), (vec_uint4)bias));
bias = (vector float)(vec_sra((vector signed int)x, __vec_splatsu4(31) ));
bias = (vector float)(vec_andc(__vec_splatsu4(0x3F7FFFFF), (vector unsigned int)bias));
ix = vec_cts(vec_add(x, bias), 0);
#else
bias = vec_sel(vec_floor(x), vec_ceil(x), vec_cmpgt(x, vec_splatsf4(0.0f)));
bias = vec_sel(vec_floor(x), vec_ceil(x), vec_cmpgt(x, __vec_splatsf4(0.0f)));
ix = vec_cts(bias, 0);
#endif
frac = vec_sub(vec_ctf(ix, 0), x);
frac = vec_madd(frac, vec_splatsf4(_EXP2F_H_LN2), zeros);
frac = vec_madd(frac, __vec_splatsf4(__EXP2F_LN2), zeros);
// !!! HRD Changing weird un-understandable and incorrect overflow handling code
//overflow = vec_sel((vec_uint4)(0x7FFFFFFF), (vec_uint4)x, (vec_uint4)(0x80000000) );
overflow = (vec_uint4)vec_cmpgt(x, (vec_float4)(vec_splatsi4(0x4300FFFF))); // !!! Biggest possible exponent to fit in range.
underflow = (vec_uint4)vec_cmpgt(vec_splatsf4(-126.0f), x);
overflow = (vector unsigned int)vec_cmpgt(x, (vector float)(__vec_splatsi4(0x4300FFFF))); // !!! Biggest possible exponent to fit in range.
underflow = (vector unsigned int)vec_cmpgt(__vec_splatsf4(-126.0f), x);
//exp_int = (vec_float4)(vec_sl(vec_add(ix, (vec_int4)(127)), (vec_uint4)(23))); // !!! HRD <- changing this to correct for
// !!! overflow (x >= 127.999999f)
exp_int = (vec_float4)(vec_sl(vec_add(ix, vec_splatsi4(126)), vec_splatsu4(23))); // !!! HRD <- add with saturation
exp_int = (vector float)(vec_sl(vec_add(ix, __vec_splatsi4(126)), __vec_splatsu4(23))); // !!! HRD <- add with saturation
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
@@ -117,22 +116,22 @@ exp2f4 (vector float x)
frac2 = vec_madd(frac, frac, zeros);
frac4 = vec_madd(frac2, frac2, zeros);
hi = vec_madd(frac, vec_splatsf4(-0.0001413161), vec_splatsf4(0.0013298820));
hi = vec_madd(frac, hi, vec_splatsf4(-0.0083013598));
hi = vec_madd(frac, hi, vec_splatsf4(0.0416573475));
lo = vec_madd(frac, vec_splatsf4(-0.1666653019), vec_splatsf4(0.4999999206));
lo = vec_madd(frac, lo, vec_splatsf4(-0.9999999995));
lo = vec_madd(frac, lo, vec_splatsf4(1.0));
hi = vec_madd(frac, __vec_splatsf4(-0.0001413161), __vec_splatsf4(0.0013298820));
hi = vec_madd(frac, hi, __vec_splatsf4(-0.0083013598));
hi = vec_madd(frac, hi, __vec_splatsf4(0.0416573475));
lo = vec_madd(frac, __vec_splatsf4(-0.1666653019), __vec_splatsf4(0.4999999206));
lo = vec_madd(frac, lo, __vec_splatsf4(-0.9999999995));
lo = vec_madd(frac, lo, __vec_splatsf4(1.0));
exp_frac = vec_madd(frac4, hi, lo);
//ix = vec_add(ix, vec_sr((vec_int4)(exp_frac), (vec_uint4)(23) ));
result = vec_madd(exp_frac, exp_int, zeros);
result = vec_madd(exp_frac, exp_int, result); // !!! HRD
/* Handle overflow */
result = vec_sel(result, vec_splatsf4(HUGE_VALF), overflow);
result = vec_sel(result, __vec_splatsf4(HUGE_VALF), overflow);
result = vec_sel(result, zeros, underflow);
//result = vec_sel(result, (vec_float4)(overflow), vec_cmpgt((vec_uint4)(ix), (vec_uint4)(255)));
return (result);
}
#endif

View File

@@ -27,39 +27,43 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPF4_H___
#define ___SIMD_MATH_EXPF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/divf4.h>
#include <simdmath/ldexpf4.h>
#define _EXPF_H_C1 ((float)-0.6931470632553101f)
#define _EXPF_H_C2 ((float)-1.1730463525082e-7f)
#define __EXPF_C1 -0.6931470632553101f
#define __EXPF_C2 -1.1730463525082e-7f
#define _EXPF_H_INVLN2 ((float)1.4426950408889634f)
#define __EXPF_INVLN2 1.4426950408889634f
vector float
expf4 (vector float x)
static inline vector float
_expf4 (vector float x)
{
vec_float4 zeros = vec_splatsf4(0.0f);
vec_uint4 xnegmask = (vec_uint4)vec_cmpgt(zeros, x);
vec_float4 goffset = vec_sel(vec_splatsf4( 0.5f),vec_splatsf4(-0.5f),xnegmask);
vec_float4 g = vec_madd(x, vec_splatsf4(_EXPF_H_INVLN2), zeros);
vec_int4 xexp = vec_cts(vec_add(g, goffset),0);
vector float zeros = __vec_splatsf4(0.0f);
vector unsigned int xnegmask = (vector unsigned int)vec_cmpgt(zeros, x);
vector float goffset = vec_sel(__vec_splatsf4( 0.5f),__vec_splatsf4(-0.5f),xnegmask);
vector float g = vec_madd(x, __vec_splatsf4(__EXPF_INVLN2), zeros);
vector signed int xexp = vec_cts(vec_add(g, goffset),0);
g = vec_ctf(xexp, 0);
g = vec_madd(g, vec_splatsf4(_EXPF_H_C2), vec_madd(g, vec_splatsf4(_EXPF_H_C1), x));
vec_float4 z = vec_madd(g, g, zeros);
vec_float4 a = vec_madd(z, vec_splatsf4(0.0999748594f), zeros);
vec_float4 b = vec_madd(g,
vec_madd(z,
vec_splatsf4(0.0083208258f),
vec_splatsf4(0.4999999992f)
),
zeros);
g = vec_madd(g, __vec_splatsf4(__EXPF_C2), vec_madd(g, __vec_splatsf4(__EXPF_C1), x));
vector float z = vec_madd(g, g, zeros);
vector float a = vec_madd(z, __vec_splatsf4(0.0999748594f), zeros);
vector float b = vec_madd(g,
vec_madd(z,
__vec_splatsf4(0.0083208258f),
__vec_splatsf4(0.4999999992f)),
zeros);
vec_float4 foo = divf4(vec_add(vec_splatsf4(1.0f), vec_add(a, b)),
vec_add(vec_splatsf4(1.0f), vec_sub(a, b)));
return ldexpf4(foo, xexp);
vector float foo = _divf4(vec_add(__vec_splatsf4(1.0f), vec_add(a, b)),
vec_add(__vec_splatsf4(1.0f), vec_sub(a, b)));
return _ldexpf4(foo, xexp);
}
#endif

View File

@@ -27,31 +27,37 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_EXPM1F4_H___
#define ___SIMD_MATH_EXPM1F4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
#define _EXPM1F_H_ln1by2 ((float)-0.6931471805599f)
#define _EXPM1F_H_ln3by2 ((float) 0.4054651081082f)
#define __EXPM1F_ln1by2 -0.6931471805599f
#define __EXPM1F_ln3by2 0.4054651081082f
vector float
expm1f4 (vector float x)
static inline vector float
_expm1f4 (vector float x)
{
vec_float4 zeros = vec_splatsf4(0.0f);
vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(_EXPM1F_H_ln1by2)),
vec_cmpgt(vec_splatsf4(_EXPM1F_H_ln3by2), x));
vec_float4 x2 = vec_madd(x,x,zeros);
vec_float4 d0, d1, n0, n1;
vector float zeros = __vec_splatsf4(0.0f);
vector unsigned int nearzeromask =
(vector unsigned int)vec_and(vec_cmpgt(x, __vec_splatsf4(__EXPM1F_ln1by2)),
vec_cmpgt(__vec_splatsf4(__EXPM1F_ln3by2), x));
vector float x2 = vec_madd(x,x,zeros);
vector float d0, d1, n0, n1;
d0 = vec_madd(x , vec_splatsf4(-0.3203561199f), vec_splatsf4(0.9483177697f));
d1 = vec_madd(x2, vec_splatsf4( 0.0326527809f), d0);
d0 = vec_madd(x , __vec_splatsf4(-0.3203561199f), __vec_splatsf4(0.9483177697f));
d1 = vec_madd(x2, __vec_splatsf4( 0.0326527809f), d0);
n0 = vec_madd(x , vec_splatsf4(0.1538026623f), vec_splatsf4(0.9483177732f));
n1 = vec_madd(x , vec_splatsf4(0.0024490478f), vec_splatsf4(0.0305274668f));
n0 = vec_madd(x , __vec_splatsf4(0.1538026623f), __vec_splatsf4(0.9483177732f));
n1 = vec_madd(x , __vec_splatsf4(0.0024490478f), __vec_splatsf4(0.0305274668f));
n1 = vec_madd(x2, n1, n0);
return vec_sel(vec_sub(expf4(x), vec_splatsf4(1.0f)),
return vec_sel(vec_sub(expf4(x), __vec_splatsf4(1.0f)),
vec_madd(x, divf4(n1, d1), zeros),
nearzeromask);
}
#endif

View File

@@ -27,12 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FABSF4_H___
#define ___SIMD_MATH_FABSF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float fabsf4(vector float x)
static inline vector float
_fabsf4(vector float x)
{
return vec_abs( x );
return vec_abs( x );
}
#endif

View File

@@ -27,13 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FDIMF4_H___
#define ___SIMD_MATH_FDIMF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_vec_utils.h>
vector float
fdimf4 (vector float x, vector float y)
static inline vector float
_fdimf4 (vector float x, vector float y)
{
vector float diff = vec_sub(x,y);
return vec_sel(((vector float){0.0f, 0.0f, 0.0f, 0.0f}), diff, vec_cmpgt(x,y));
return vec_sel(__vec_splatsf4(0.0f), diff, vec_cmpgt(x,y));
}
#endif

View File

@@ -27,13 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FLOORF4_H___
#define ___SIMD_MATH_FLOORF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
floorf4 (vector float x)
static inline vector float
_floorf4 (vector float x)
{
return vec_floor( x );
return vec_floor( x );
}
#endif

View File

@@ -27,11 +27,16 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAF4_H___
#define ___SIMD_MATH_FMAF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
fmaf4 (vector float x, vector float y, vector float z)
static inline vector float
_fmaf4 (vector float x, vector float y, vector float z)
{
return vec_madd(x,y,z);
}
#endif

View File

@@ -27,14 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMAXF4_H___
#define ___SIMD_MATH_FMAXF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
fmaxf4 (vector float x, vector float y)
static inline vector float
_fmaxf4 (vector float x, vector float y)
{
return vec_max( x, y );
return vec_max( x, y );
}
#endif

View File

@@ -27,13 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMINF4_H___
#define ___SIMD_MATH_FMINF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
fminf4 (vector float x, vector float y)
static inline vector float
_fminf4 (vector float x, vector float y)
{
return vec_min( x, y );
return vec_min( x, y );
}
#endif

View File

@@ -27,56 +27,62 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FMODF4_H___
#define ___SIMD_MATH_FMODF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/divf4.h>
#include <simdmath/fabsf4.h>
#include <simdmath/copysignf4.h>
//
// This returns an accurate result when |divf4(x,y)| < 2^20 and |x| < 2^128, and otherwise returns zero.
// If x == 0, the result is 0.
// If x != 0 and y == 0, the result is undefined.
vector float
fmodf4 (vector float x, vector float y)
static inline vector float
_fmodf4 (vector float x, vector float y)
{
vec_float4 q, xabs, yabs, qabs, xabs2;
vec_int4 qi0, qi1, qi2;
vec_float4 i0, i1, i2, r1, r2, i;
vec_uint4 inrange;
vector float q, xabs, yabs, qabs, xabs2;
vector signed int qi0, qi1, qi2;
vector float i0, i1, i2, r1, r2, i;
vector unsigned int inrange;
// Find i = truncated_integer(|x/y|)
// Find i = truncated_integer(|x/y|)
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
// If |divf4(x,y)| < 2^20, the quotient is at most off by 1.0.
// Thus i is either the truncated quotient, one less, or one greater.
q = divf4( x, y );
xabs = fabsf4( x );
yabs = fabsf4( y );
qabs = fabsf4( q );
xabs2 = vec_add( xabs, xabs );
q = _divf4( x, y );
xabs = _fabsf4( x );
yabs = _fabsf4( y );
qabs = _fabsf4( q );
xabs2 = vec_add( xabs, xabs );
inrange = (vec_uint4)vec_cmpgt( (vec_float4)(vec_splatsu4(0x49800000)), qabs );
inrange = (vector unsigned int)vec_cmpgt( (vector float)(__vec_splatsu4(0x49800000)), qabs );
qi1 = vec_cts( qabs, 0 );
qi0 = vec_add( qi1, ((vec_int4){-1, -1, -1, -1}) );
qi2 = vec_add( qi1, ((vec_int4){1, 1, 1, 1}) );
qi1 = vec_cts( qabs, 0 );
qi0 = vec_add( qi1, __vec_splatsi4(-1) );
qi2 = vec_add( qi1, __vec_splatsi4(1) );
i0 = vec_ctf( qi0, 0 );
i1 = vec_ctf( qi1, 0 );
i2 = vec_ctf( qi2, 0 );
i0 = vec_ctf( qi0, 0 );
i1 = vec_ctf( qi1, 0 );
i2 = vec_ctf( qi2, 0 );
// Correct i will be the largest one such that |x| - i*|y| >= 0.
// Correct i will be the largest one such that |x| - i*|y| >= 0.
r1 = vec_nmsub( i1, yabs, xabs );
r2 = vec_nmsub( i2, yabs, xabs );
r1 = vec_nmsub( i1, yabs, xabs );
r2 = vec_nmsub( i2, yabs, xabs );
i = i0;
i = vec_sel( i1, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r1 ) );
i = vec_sel( i2, i, vec_cmpgt( vec_splatsi4(0), (vec_int4)r2 ) );
i = i0;
i = vec_sel( i1, i, vec_cmpgt( __vec_splatsi4(0), (vector signed int)r1 ) );
i = vec_sel( i2, i, vec_cmpgt( __vec_splatsi4(0), (vector signed int)r2 ) );
i = copysignf4( i, q );
i = _copysignf4( i, q );
return vec_sel( vec_splatsf4(0.0f), vec_nmsub( i, y, x ), inrange );
return vec_sel( __vec_splatsf4(0.0f), vec_nmsub( i, y, x ), inrange );
}
#endif

View File

@@ -27,31 +27,28 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_FREXPF4_H___
#define ___SIMD_MATH_FREXPF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
frexpf4 (vector float x, vector signed int *exp)
static inline vector float
_frexpf4 (vector float x, vector signed int *exp)
{
vec_int4 zeros = (vec_int4){0,0,0,0};
vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros);
vector signed int zeros = __vec_splatsi4(0);
vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(x, (vector float)zeros);
vec_int4 expmask = vec_splatsi4(0x7F800000);
vec_int4 e1 = vec_and ( (vec_int4)x, expmask);
vec_int4 e2 = vec_sub(vec_sr(e1, vec_splatsu4(23) ), vec_splatsi4(126) );
vector signed int expmask = __vec_splatsi4(0x7F800000);
vector signed int e1 = vec_and ( (vector signed int)x, expmask);
vector signed int e2 = vec_sub(vec_sr(e1, __vec_splatsu4(23) ), __vec_splatsi4(126) );
*exp = vec_sel(e2, zeros, zeromask);
vec_float4 m2 = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask);
vector float m2 = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask);
return vec_sel(m2, (vec_float4)zeros, zeromask);
return vec_sel(m2, (vector float)zeros, zeromask);
}
/*
{
*exp = ((vec_int4)(0));
return ((vec_float4)(0.0f));
}
*/
#endif

View File

@@ -27,15 +27,20 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_HYPOTF4_H___
#define ___SIMD_MATH_HYPOTF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/sqrtf4.h>
vector float
hypotf4 (vector float x, vector float y)
static inline vector float
_hypotf4 (vector float x, vector float y)
{
vec_float4 sum = vec_madd(x,x, ((vec_float4){0.0f, 0.0f, 0.0f, 0.0f}) );
sum = vec_madd(y,y,sum);
return sqrtf4(sum);
vector float sum = vec_madd( x, x, __vec_splatsf4(0.0f) );
sum = vec_madd(y, y, sum);
return _sqrtf4(sum);
}
#endif

View File

@@ -27,21 +27,26 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_ILOGBF4_H___
#define ___SIMD_MATH_ILOGBF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <math.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector signed int
ilogbf4 (vector float x)
static inline vector signed int
_ilogbf4 (vector float x)
{
vec_int4 minus127 = vec_splatsi4(-127);
vector signed int minus127 = __vec_splatsi4(-127);
vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000));
vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0));
vec_int4 e2 = vec_add(vec_sr(e1,vec_splatsu4(23)), minus127);
vector signed int e1 = vec_and((vector signed int)x, __vec_splatsi4(0x7F800000));
vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(e1, __vec_splatsi4(0));
vector signed int e2 = vec_add(vec_sr(e1,__vec_splatsu4(23)), minus127);
return vec_sel(e2, vec_splatsi4(FP_ILOGB0), zeromask);
return vec_sel(e2, __vec_splatsi4(FP_ILOGB0), zeromask);
}
#endif

View File

@@ -0,0 +1,63 @@
/* ldexpf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LDEXPF4_H___
#define ___SIMD_MATH_LDEXPF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_vec_utils.h>
static inline vector float
_ldexpf4 (vector float x, vector signed int exp)
{
vector signed int zeros = __vec_splatsi4(0);
vector signed int expmask = __vec_splatsi4(0x7F800000);
vector signed int e1 = vec_and((vector signed int)x, expmask);
vector signed int e2 = vec_sr(e1,__vec_splatsu4(23));
vector unsigned int maxmask = (vector unsigned int)vec_cmpgt(exp, __vec_splatsi4(255));
vector unsigned int minmask = (vector unsigned int)vec_cmpgt(__vec_splatsi4(-255), exp);
minmask = vec_or (minmask, (vector unsigned int)vec_cmpeq(x, (vector float)zeros));
vector signed int esum = vec_add(e2, exp);
maxmask = vec_or (maxmask, (vector unsigned int)vec_cmpgt(esum, __vec_splatsi4(255)));
maxmask = vec_and(maxmask, __vec_splatsu4(0x7FFFFFFF));
minmask = vec_or (minmask, (vector unsigned int)vec_cmpgt(zeros, esum));
x = vec_sel(x, (vector float)vec_sl(esum,__vec_splatsu4(23)), (vector unsigned int)expmask);
x = vec_sel(x, (vector float)zeros, minmask);
x = vec_sel(x, (vector float)maxmask, maxmask);
return x;
}
#endif

View File

@@ -0,0 +1,83 @@
/* log10f4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG10F4_H___
#define ___SIMD_MATH_LOG10F4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/divf4.h>
#define __LOG10F_loga2msb 0.3010299205780f
#define __LOG10F_loga2lsb 7.5085978266e-8f
#define __LOG10F_logaemsb 0.4342944622040f
#define __LOG10F_logaelsb 1.9699272335e-8f
#define __LOG10F_neglogae -0.4342944819033f
#define __LOG10F_c0 -0.2988439998f
#define __LOG10F_c1 -0.3997655209f
#define __LOG10F_c2 -0.6666679125f
static inline vector float
_log10f4 (vector float x)
{
vector signed int zeros = __vec_splatsi4(0);
vector float ones = __vec_splatsf4(1.0f);
vector signed int expmask = __vec_splatsi4(0x7F800000);
vector signed int xexp =
vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) );
x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask);
vector unsigned int mask = (vector unsigned int)vec_cmpgt( __vec_splatsf4(0.7071067811865f), x);
x = vec_sel(x , vec_add(x, x) , mask);
xexp = vec_sel(xexp, vec_sub(xexp, __vec_splatsi4(1)), mask);
vector float x1 = vec_sub(x , ones);
vector float z = _divf4 (x1, vec_add(x, ones));
vector float w = vec_madd(z , z, (vector float)zeros);
vector float polywneg;
polywneg = vec_madd(__vec_splatsf4(__LOG10F_c0), w, __vec_splatsf4(__LOG10F_c1));
polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOG10F_c2));
vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros);
vector float wnew = vec_ctf(xexp,0);
vector float zz1 = vec_madd(__vec_splatsf4(__LOG10F_logaemsb), x1,
vec_madd(__vec_splatsf4(__LOG10F_loga2msb),wnew,(vector float)zeros));
vector float zz2 = vec_madd(__vec_splatsf4(__LOG10F_logaelsb), x1,
vec_madd(__vec_splatsf4(__LOG10F_loga2lsb), wnew,
vec_madd(__vec_splatsf4(__LOG10F_neglogae),y,(vector float)zeros))
);
return vec_add(zz1, zz2);
}
#endif

View File

@@ -27,28 +27,35 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG1PF4_H___
#define ___SIMD_MATH_LOG1PF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/logf4.h>
#include <simdmath/divf4.h>
vector float
log1pf4 (vector float x)
static inline vector float
_log1pf4 (vector float x)
{
vec_uint4 nearzeromask = (vec_uint4)vec_and(vec_cmpgt(x, vec_splatsf4(-0.5f)),
vec_cmpgt(vec_splatsf4(0.5f), x));
vec_float4 x2 = vec_madd(x,x,vec_splatsf4(0.0f));
vec_float4 d0, d1, n0, n1;
vector unsigned int nearzeromask =
(vector unsigned int)vec_and(vec_cmpgt(x, __vec_splatsf4(-0.5f)),
vec_cmpgt(__vec_splatsf4(0.5f), x));
vector float x2 = vec_madd(x,x,__vec_splatsf4(0.0f));
vector float d0, d1, n0, n1;
d0 = vec_madd(x , vec_splatsf4(1.5934420741f), vec_splatsf4(0.8952856868f));
d1 = vec_madd(x , vec_splatsf4(0.1198195734f), vec_splatsf4(0.8377145063f));
d0 = vec_madd(x , __vec_splatsf4(1.5934420741f), __vec_splatsf4(0.8952856868f));
d1 = vec_madd(x , __vec_splatsf4(0.1198195734f), __vec_splatsf4(0.8377145063f));
d1 = vec_madd(x2, d1, d0);
n0 = vec_madd(x , vec_splatsf4(1.1457993413f), vec_splatsf4(0.8952856678f));
n1 = vec_madd(x , vec_splatsf4(0.0082862580f), vec_splatsf4(0.3394238808f));
n0 = vec_madd(x , __vec_splatsf4(1.1457993413f), __vec_splatsf4(0.8952856678f));
n1 = vec_madd(x , __vec_splatsf4(0.0082862580f), __vec_splatsf4(0.3394238808f));
n1 = vec_madd(x2, n1, n0);
return vec_sel(logf4(vec_add(x, vec_splatsf4(1.0f))),
vec_madd(x, divf4(n1, d1), vec_splatsf4(0.0f)),
return vec_sel(_logf4(vec_add(x, __vec_splatsf4(1.0f))),
vec_madd(x, _divf4(n1, d1), __vec_splatsf4(0.0f)),
nearzeromask);
}
#endif

View File

@@ -27,54 +27,51 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOG2F4_H___
#define ___SIMD_MATH_LOG2F4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/divf4.h>
#define _LOG2F_H_l2emsb ((float) 1.4426950216293f)
#define _LOG2F_H_l2elsb ((float) 1.9259629911e-8f)
#define _LOG2F_H_negl2e ((float)-1.4426950408890f)
#define __LOG2F_l2emsb 1.4426950216293f
#define __LOG2F_l2elsb 1.9259629911e-8f
#define __LOG2F_negl2e -1.4426950408890f
#define _LOG2F_H_c0 ((float)(-0.2988439998f))
#define _LOG2F_H_c1 ((float)(-0.3997655209f))
#define _LOG2F_H_c2 ((float)(-0.6666679125f))
#define __LOG2F_c0 -0.2988439998f
#define __LOG2F_c1 -0.3997655209f
#define __LOG2F_c2 -0.6666679125f
vector float
log2f4 (vector float x)
static inline vector float
_log2f4 (vector float x)
{
vec_int4 zeros = vec_splatsi4(0);
vec_float4 ones = vec_splatsf4(1.0f);
//vec_uint4 zeromask = (vec_uint4)vec_cmpeq(x, (vec_float4)zeros);
vector signed int zeros = __vec_splatsi4(0);
vector float ones = __vec_splatsf4(1.0f);
vec_int4 expmask = vec_splatsi4(0x7F800000);
vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) );
x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask);
vector signed int expmask = __vec_splatsi4(0x7F800000);
vector signed int xexp =
vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) );
x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask);
vec_uint4 mask = (vec_uint4)vec_cmpgt( vec_splatsf4((float)0.7071067811865f), x);
vector unsigned int mask = (vector unsigned int)vec_cmpgt( __vec_splatsf4(0.7071067811865f), x);
x = vec_sel(x , vec_add(x, x) , mask);
xexp = vec_sel(xexp, vec_sub(xexp, vec_splatsi4(1) ), mask);
xexp = vec_sel(xexp, vec_sub(xexp, __vec_splatsi4(1) ), mask);
vec_float4 x1 = vec_sub(x , ones);
vec_float4 z = divf4(x1, vec_add(x, ones));
vec_float4 w = vec_madd(z , z, (vec_float4)zeros);
vec_float4 polywneg;
polywneg = vec_madd(vec_splatsf4(_LOG2F_H_c0), w, vec_splatsf4(_LOG2F_H_c1));
polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOG2F_H_c2));
vector float x1 = vec_sub(x , ones);
vector float z = _divf4(x1, vec_add(x, ones));
vector float w = vec_madd(z , z, (vector float)zeros);
vector float polywneg;
polywneg = vec_madd(__vec_splatsf4(__LOG2F_c0), w, __vec_splatsf4(__LOG2F_c1));
polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOG2F_c2));
vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros);
vec_float4 zz1 = vec_madd(vec_splatsf4(_LOG2F_H_l2emsb), x1, vec_ctf(xexp,0));
vec_float4 zz2 = vec_madd(vec_splatsf4(_LOG2F_H_l2elsb), x1,
vec_madd(vec_splatsf4(_LOG2F_H_negl2e), y, (vec_float4)zeros)
);
vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros);
vector float zz1 = vec_madd(__vec_splatsf4(__LOG2F_l2emsb), x1, vec_ctf(xexp,0));
vector float zz2 = vec_madd(__vec_splatsf4(__LOG2F_l2elsb), x1,
vec_madd(__vec_splatsf4(__LOG2F_negl2e), y, (vector float)zeros));
//return vec_sel(vec_add(zz1,zz2), (vec_float4)zeromask, zeromask);
return vec_add(zz1,zz2);
}
/*
{
return ((vec_float4)(0.0f));
}
*/
#endif

View File

@@ -27,18 +27,22 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGBF4_H___
#define ___SIMD_MATH_LOGBF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <math.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
logbf4 (vector float x)
static inline vector float
_logbf4 (vector float x)
{
vec_int4 e1 = vec_and((vec_int4)x, vec_splatsi4(0x7F800000));
vec_uint4 zeromask = (vec_uint4)vec_cmpeq(e1, vec_splatsi4(0));
e1 = vec_sub(e1, vec_splatsi4(0x3F800000));
return vec_sel(vec_ctf(e1,23), vec_splatsf4(-HUGE_VALF), zeromask);
vector signed int e1 = vec_and((vector signed int)x, __vec_splatsi4(0x7F800000));
vector unsigned int zeromask = (vector unsigned int)vec_cmpeq(e1, __vec_splatsi4(0));
e1 = vec_sub(e1, __vec_splatsi4(0x3F800000));
return vec_sel(vec_ctf(e1,23), __vec_splatsf4(-HUGE_VALF), zeromask);
}
#endif

View File

@@ -27,47 +27,51 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_LOGF4_H___
#define ___SIMD_MATH_LOGF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/divf4.h>
#define _LOGF_H_ln2msb ((float)(0.6931470632553f))
#define _LOGF_H_negln2lsb ((float)(-1.1730463525e-7f))
#define __LOGF_ln2msb 0.6931470632553f
#define __LOGF_negln2lsb -1.1730463525e-7f
#define _LOGF_H_c0 ((float)(-0.2988439998f))
#define _LOGF_H_c1 ((float)(-0.3997655209f))
#define _LOGF_H_c2 ((float)(-0.6666679125f))
#define __LOGF_c0 -0.2988439998f
#define __LOGF_c1 -0.3997655209f
#define __LOGF_c2 -0.6666679125f
vector float
logf4 (vector float x)
static inline vector float
_logf4 (vector float x)
{
vec_int4 zeros = vec_splatsi4(0);
vec_float4 ones = vec_splatsf4(1.0f);
//vec_uchar16 zeromask = (vec_uchar16)vec_cmpeq(x, (vec_float4)zeros);
vector signed int zeros = __vec_splatsi4(0);
vector float ones = __vec_splatsf4(1.0f);
vec_int4 expmask = vec_splatsi4(0x7F800000);
vec_int4 xexp = vec_add( vec_sr(vec_and((vec_int4)x, expmask), vec_splatsu4(23)), vec_splatsi4(-126) );
x = vec_sel(x, (vec_float4)(vec_splatsi4(0x3F000000)), (vec_uint4)expmask);
vector signed int expmask = __vec_splatsi4(0x7F800000);
vector signed int xexp =
vec_add( vec_sr(vec_and((vector signed int)x, expmask), __vec_splatsu4(23)), __vec_splatsi4(-126) );
x = vec_sel(x, (vector float)(__vec_splatsi4(0x3F000000)), (vector unsigned int)expmask);
vec_uint4 mask = (vec_uint4)vec_cmpgt(vec_splatsf4((float)0.7071067811865f), x);
vector unsigned int mask = (vector unsigned int)vec_cmpgt(__vec_splatsf4(0.7071067811865f), x);
x = vec_sel(x , vec_add(x, x) , mask);
xexp = vec_sel(xexp, vec_sub(xexp,vec_splatsi4(1)), mask);
xexp = vec_sel(xexp, vec_sub(xexp,__vec_splatsi4(1)), mask);
vec_float4 x1 = vec_sub(x , ones);
vec_float4 z = divf4 (x1, vec_add(x, ones));
vec_float4 w = vec_madd(z , z, (vec_float4)zeros);
vec_float4 polywneg;
polywneg = vec_madd(vec_splatsf4(_LOGF_H_c0), w, vec_splatsf4(_LOGF_H_c1));
polywneg = vec_madd(polywneg , w, vec_splatsf4(_LOGF_H_c2));
vector float x1 = vec_sub(x , ones);
vector float z = _divf4 (x1, vec_add(x, ones));
vector float w = vec_madd(z , z, (vector float)zeros);
vector float polywneg;
polywneg = vec_madd(__vec_splatsf4(__LOGF_c0), w, __vec_splatsf4(__LOGF_c1));
polywneg = vec_madd(polywneg , w, __vec_splatsf4(__LOGF_c2));
vec_float4 y = vec_madd(z, vec_madd(polywneg, w, x1), (vec_float4)zeros);
vec_float4 wnew = vec_ctf(xexp,0);
vec_float4 zz1 = vec_madd(vec_splatsf4(_LOGF_H_ln2msb) , wnew, x1);
vec_float4 zz2neg = vec_madd(vec_splatsf4(_LOGF_H_negln2lsb), wnew, y );
vector float y = vec_madd(z, vec_madd(polywneg, w, x1), (vector float)zeros);
vector float wnew = vec_ctf(xexp,0);
vector float zz1 = vec_madd(__vec_splatsf4(__LOGF_ln2msb) , wnew, x1);
vector float zz2neg = vec_madd(__vec_splatsf4(__LOGF_negln2lsb), wnew, y );
//return vec_sel(vec_sub(zz1,zz2neg), (vec_float4)zeromask, zeromask);
return vec_sub(zz1,zz2neg);
}
#endif

View File

@@ -27,21 +27,27 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_MODFF4_H___
#define ___SIMD_MATH_MODFF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/truncf4.h>
// modff4 - for each of four float slots, compute fractional and integral parts.
// Returns fractional part and stores integral part in *iptr.
vector float
modff4 (vector float x, vector float *iptr)
static inline vector float
_modff4 (vector float x, vector float *iptr)
{
vector float integral, fraction;
vector float integral, fraction;
integral = truncf4( x );
fraction = vec_sub( x, integral );
integral = _truncf4( x );
fraction = vec_sub( x, integral );
*iptr = integral;
return fraction;
*iptr = integral;
return fraction;
}
#endif

View File

@@ -27,14 +27,18 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATEF4_H___
#define ___SIMD_MATH_NEGATEF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
negatef4 (vector float x)
static inline vector float
_negatef4 (vector float x)
{
return (vector float)vec_xor( (vector unsigned int)x, vec_splatsu4(0x80000000) );
return (vector float)vec_xor( (vector unsigned int)x, __vec_splatsu4(0x80000000) );
}
#endif

View File

@@ -27,13 +27,19 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_NEGATEI4_H___
#define ___SIMD_MATH_NEGATEI4_H___
#include <simdmath.h>
#include <altivec.h>
vector signed int
negatei4 (vector signed int x)
#include <simdmath/_vec_utils.h>
static inline vector signed int
_negatei4 (vector signed int x)
{
vector signed int zero = (vector signed int){0, 0, 0, 0};
vector signed int zero = __vec_splatsi4(0);
return vec_sub (zero, x);
}
#endif

View File

@@ -1,4 +1,4 @@
/* ldexpf4 -
/* exp2f4
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
@@ -27,32 +27,35 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_POWF4_H___
#define ___SIMD_MATH_POWF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <math.h>
#include "common-types.h"
#include <simdmath/exp2f4.h>
#include <simdmath/log2f4.h>
vector float
ldexpf4 (vector float x, vector signed int exp)
static inline vector float
_powf4 (vector float x, vector float y)
{
vec_int4 zeros = vec_splatsi4(0);
vector signed int zeros = __vec_splatsi4(0);
vector unsigned int zeromask = (vector unsigned int)vec_cmpeq((vector float)zeros, x);
vec_int4 expmask = vec_splatsi4(0x7F800000);
vec_int4 e1 = vec_and((vec_int4)x, expmask);
vec_int4 e2 = vec_sr(e1,vec_splatsu4(23));
vector unsigned int negmask = (vector unsigned int)vec_cmpgt((vector float)zeros, x);
vector float sbit = (vector float)(__vec_splatsi4(0x80000000));
vector float absx = vec_andc(x, sbit);
vector float absy = vec_andc(y, sbit);
vector unsigned int oddy = vec_and(vec_ctu(absy, 0), __vec_splatsu4(0x00000001));
negmask = vec_and(negmask, (vector unsigned int)vec_cmpgt(oddy, (vector unsigned int)zeros));
vec_uint4 maxmask = (vec_uint4)vec_cmpgt(exp, vec_splatsi4(255));
vec_uint4 minmask = (vec_uint4)vec_cmpgt(vec_splatsi4(-255), exp);
minmask = vec_or (minmask, (vec_uint4)vec_cmpeq(x, (vec_float4)zeros));
vector float res = _exp2f4(vec_madd(y, _log2f4(absx), (vector float)zeros));
res = vec_sel(res, vec_or(sbit, res), negmask);
vec_int4 esum = vec_add(e2, exp);
maxmask = vec_or (maxmask, (vec_uint4)vec_cmpgt(esum, vec_splatsi4(255)));
maxmask = vec_and(maxmask, vec_splatsu4(0x7FFFFFFF));
minmask = vec_or (minmask, (vec_uint4)vec_cmpgt(zeros, esum));
x = vec_sel(x, (vec_float4)vec_sl(esum,vec_splatsu4(23)), (vec_uint4)expmask);
x = vec_sel(x, (vec_float4)zeros, minmask);
x = vec_sel(x, (vec_float4)maxmask, maxmask);
return x;
return vec_sel(res, (vector float)zeros, zeromask);
}
#endif

View File

@@ -27,20 +27,24 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_RECIPF4_H___
#define ___SIMD_MATH_RECIPF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
vector float
recipf4 (vector float x)
static inline vector float
_recipf4 (vector float x)
{
// Reciprocal estimate and 1 Newton-Raphson iteration.
// Reciprocal estimate and 1 Newton-Raphson iteration.
vec_float4 y0;
vec_float4 ones = ((vec_float4){1.0f, 1.0f, 1.0f, 1.0f});
vector float y0;
vector float ones = __vec_splatsf4(1.0f);
y0 = vec_re( x );
return vec_madd( vec_nmsub( x, y0, ones), y0, y0 );
y0 = vec_re( x );
return vec_madd( vec_nmsub( x, y0, ones), y0, y0 );
}
#endif

View File

@@ -27,27 +27,31 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_RSQRTF4_H___
#define ___SIMD_MATH_RSQRTF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "common-types.h"
#include <simdmath/_vec_utils.h>
// rsqrtf4 - for each of four float slots, compute reciprocal square root.
// Undefined if input < 0.
vector float
rsqrtf4 (vector float x)
static inline vector float
_rsqrtf4 (vector float x)
{
// Reciprocal square root estimate and 1 Newton-Raphson iteration.
// Reciprocal square root estimate and 1 Newton-Raphson iteration.
vector float zero = vec_splatsf4(0.0f);
vector float half = vec_splatsf4(0.5f);
vector float one = vec_splatsf4(1.0f);
vector float y0, y0x, y0half;
vector float zero = __vec_splatsf4(0.0f);
vector float half = __vec_splatsf4(0.5f);
vector float one = __vec_splatsf4(1.0f);
vector float y0, y0x, y0half;
y0 = vec_rsqrte( x );
y0x = vec_madd( y0, x, zero );
y0half = vec_madd( y0, half, zero );
return vec_madd( vec_nmsub( y0, y0x, one ), y0half, y0 );
y0 = vec_rsqrte( x );
y0x = vec_madd( y0, x, zero );
y0half = vec_madd( y0, half, zero );
return vec_madd( vec_nmsub( y0, y0x, one ), y0half, y0 );
}
#endif

View File

@@ -0,0 +1,113 @@
/* sincosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_SINCOSF4_H___
#define ___SIMD_MATH_SINCOSF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_sincos.h>
#include <simdmath/_vec_utils.h>
//
// Computes both the sine and cosine of the all four slots of x
// by using a polynomial approximation.
//
static inline void
_sincosf4 (vector float x, vector float *s, vector float *c)
{
vector float xl,xl2,xl3;
vector signed int q;
vector signed int offsetSin, offsetCos;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Compute the offset based on the quadrant that the angle falls in.
// Add 1 to the offset for the cosine.
//
offsetSin = vec_and(q,__vec_splatsi4((int)0x3));
offsetCos = vec_add(__vec_splatsi4(1),offsetSin);
// Remainder in range [-pi/4..pi/4]
//
vector float qf = vec_ctf(q,0);
vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x);
xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0),xl2,__vec_splatsf4(__SINCOSF_CC1));
vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0),xl2,__vec_splatsf4(__SINCOSF_SC1));
vector float ct2 = vec_madd(ct1,xl2,__vec_splatsf4(__SINCOSF_CC2));
vector float st2 = vec_madd(st1,xl2,__vec_splatsf4(__SINCOSF_SC2));
vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f));
vector float sx = vec_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vector unsigned int sinMask =
(vector unsigned int)vec_cmpeq(vec_and(offsetSin,__vec_splatsi4(0x1)),__vec_splatsi4(0));
vector unsigned int cosMask =
(vector unsigned int)vec_cmpeq(vec_and(offsetCos,__vec_splatsi4(0x1)),__vec_splatsi4(0));
*s = vec_sel(cx,sx,sinMask);
*c = vec_sel(cx,sx,cosMask);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
sinMask = (vector unsigned int)vec_cmpeq(vec_and(offsetSin,__vec_splatsi4(0x2)),__vec_splatsi4(0));
cosMask = (vector unsigned int)vec_cmpeq(vec_and(offsetCos,__vec_splatsi4(0x2)),__vec_splatsi4(0));
*s = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)*s),*s,sinMask);
*c = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)*c),*c,cosMask);
}
#endif

View File

@@ -0,0 +1,107 @@
/* sinf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_SINF4_H___
#define ___SIMD_MATH_SINF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_sincos.h>
#include <simdmath/_vec_utils.h>
//
// Computes the sine of each of the four slots
// by using a polynomial approximation.
//
static inline vector float
_sinf4 (vector float x)
{
vector float xl,xl2,xl3,res;
vector signed int q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Compute an offset based on the quadrant that the angle falls in
//
vector signed int offset = vec_and(q,__vec_splatsi4((int)0x3));
// Remainder in range [-pi/4..pi/4]
//
vector float qf = vec_ctf(q,0);
vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x);
xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vector float ct1 = vec_madd(__vec_splatsf4(__SINCOSF_CC0),xl2,__vec_splatsf4(__SINCOSF_CC1));
vector float st1 = vec_madd(__vec_splatsf4(__SINCOSF_SC0),xl2,__vec_splatsf4(__SINCOSF_SC1));
vector float ct2 = vec_madd(ct1,xl2,__vec_splatsf4(__SINCOSF_CC2));
vector float st2 = vec_madd(st1,xl2,__vec_splatsf4(__SINCOSF_SC2));
vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f));
vector float sx = vec_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vector unsigned int mask1 = (vector unsigned int)vec_cmpeq(vec_and(offset,
__vec_splatsi4(0x1)),
__vec_splatsi4((int)(0)));
res = vec_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vector unsigned int mask2 =
(vector unsigned int)vec_cmpeq(vec_and(offset,__vec_splatsi4(0x2)),__vec_splatsi4((int)0));
res = vec_sel((vector float)vec_xor(__vec_splatsu4(0x80000000U),(vector unsigned int)res),res,mask2);
return res;
}
#endif

View File

@@ -27,27 +27,33 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_SQRTF4_H___
#define ___SIMD_MATH_SQRTF4_H___
#include <simdmath.h>
#include <altivec.h>
#include <simdmath/_vec_utils.h>
// sqrtf4 - for each of four float slots, compute square root.
// Undefined if input < 0.
vector float
sqrtf4 (vector float x)
static inline vector float
_sqrtf4 (vector float x)
{
// Reciprocal square root estimate and 1 Newton-Raphson iteration.
// Reciprocal square root estimate and 1 Newton-Raphson iteration.
vector float zero = (vector float){0.0f, 0.0f, 0.0f, 0.0f};
vector float half = (vector float){0.5f, 0.5f, 0.5f, 0.5f};
vector float one = (vector float){1.0f, 1.0f, 1.0f, 1.0f};
vector float y0, y0x, y0xhalf;
vector unsigned int cmp_zero;
vector float zero = __vec_splatsf4(0.0f);
vector float half = __vec_splatsf4(0.5f);
vector float one = __vec_splatsf4(1.0f);
vector float y0, y0x, y0xhalf;
vector unsigned int cmp_zero;
y0 = vec_rsqrte( x );
cmp_zero = (vector unsigned int)vec_cmpeq( x, zero );
y0x = vec_madd( y0, x, zero );
y0xhalf = vec_madd( y0x, half, zero );
return vec_sel( vec_madd( vec_nmsub( y0, y0x, one ), y0xhalf, y0x ), zero, cmp_zero );
y0 = vec_rsqrte( x );
cmp_zero = (vector unsigned int)vec_cmpeq( x, zero );
y0x = vec_madd( y0, x, zero );
y0xhalf = vec_madd( y0x, half, zero );
return vec_sel( vec_madd( vec_nmsub( y0, y0x, one ), y0xhalf, y0x ), zero, cmp_zero );
}
#endif

View File

@@ -27,70 +27,72 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_TANF4_H___
#define ___SIMD_MATH_TANF4_H___
#include <simdmath.h>
#include <altivec.h>
#include "sincos_c.h"
#include "common-types.h"
#define _TAN_KC1 1.57079625129f
#define _TAN_KC2 7.54978995489e-8f
#include <simdmath/_sincos.h>
#include <simdmath/divf4.h>
//
// Computes the tangent of all four slots of x by using a polynomia approximation.
//
vector float
tanf4 (vector float x)
static inline vector float
_tanf4 (vector float x)
{
vector float xl,xl2,xl3,res;
vector signed int q;
vector float xl,xl2,xl3,res;
vector signed int q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f));
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, __vec_splatsf4(0.63661977236f),__vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(x))*sign(x))
//
xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(x))*sign(x))
//
xl = vec_add(xl,vec_sel(__vec_splatsf4(0.5f),xl,__vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Remainder in range [-pi/4..pi/4]
//
vector float qf = vec_ctf(q,0);
vector float p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x);
xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1);
// Remainder in range [-pi/4..pi/4]
//
vector float qf = vec_ctf(q,0);
vector float p1 = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC1),x);
xl = vec_nmsub(qf,__vec_splatsf4(__SINCOSF_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f));
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,__vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,__vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + x2 * (C0 * x2 + C1), and
// sx = xl + x3 * S0
//
vector float ct2 = vec_madd(vec_splatsf4( 0.0097099364f),xl2,vec_splatsf4(-0.4291161787f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + x2 * (C0 * x2 + C1), and
// sx = xl + x3 * S0
//
vector float ct2 = vec_madd(__vec_splatsf4( 0.0097099364f),xl2,__vec_splatsf4(-0.4291161787f));
vector float cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f));
vector float sx = vec_madd(vec_splatsf4(-0.0957822992f),xl3,xl);
vector float cx = vec_madd(ct2,xl2,__vec_splatsf4(1.0f));
vector float sx = vec_madd(__vec_splatsf4(-0.0957822992f),xl3,xl);
// Compute both cx/sx and sx/cx
//
vector float cxosx = divf4(cx,sx);
vector float sxocx = divf4(sx,cx);
// Compute both cx/sx and sx/cx
//
vector float cxosx = _divf4(cx,sx);
vector float sxocx = _divf4(sx,cx);
vector float ncxosx = (vector float)vec_xor(vec_splatsu4(0x80000000),(vector unsigned int)cxosx);
vector float ncxosx = (vector float)vec_xor(__vec_splatsu4(0x80000000),(vector unsigned int)cxosx);
// For odd numbered quadrants return -cx/sx , otherwise return
// sx/cx
//
vector unsigned int mask = (vector unsigned int)vec_cmpeq(vec_and(q,vec_splatsi4(0x1)),vec_splatsi4(0));
res = vec_sel(ncxosx,sxocx,mask);
// For odd numbered quadrants return -cx/sx , otherwise return
// sx/cx
//
vector unsigned int mask =
(vector unsigned int)vec_cmpeq(vec_and(q,__vec_splatsi4(0x1)),__vec_splatsi4(0));
res = vec_sel(ncxosx,sxocx,mask);
return res;
return res;
}
#endif

View File

@@ -27,13 +27,17 @@
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ___SIMD_MATH_TRUNCF4_H___
#define ___SIMD_MATH_TRUNCF4_H___
#include <simdmath.h>
#include <altivec.h>
vector float
truncf4 (vector float x)
static inline vector float
_truncf4 (vector float x)
{
return vec_trunc( x );
return vec_trunc( x );
}
#endif

View File

@@ -1,96 +0,0 @@
/* Common constants for Sin/Cos/Tan
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __SINCOS_C2__
#define __SINCOS_C2__
//
// Common constants used to evaluate sind2/cosd2/tand2
//
#define _SINCOS_CC0D 0.00000000206374484196
#define _SINCOS_CC1D -0.00000027555365134677
#define _SINCOS_CC2D 0.00002480157946764225
#define _SINCOS_CC3D -0.00138888888730525966
#define _SINCOS_CC4D 0.04166666666651986722
#define _SINCOS_CC5D -0.49999999999999547304
#define _SINCOS_SC0D 0.00000000015893606014
#define _SINCOS_SC1D -0.00000002505069049138
#define _SINCOS_SC2D 0.00000275573131527032
#define _SINCOS_SC3D -0.00019841269827816117
#define _SINCOS_SC4D 0.00833333333331908278
#define _SINCOS_SC5D -0.16666666666666612594
#define _SINCOS_KC1D (13176794.0 / 8388608.0)
#define _SINCOS_KC2D 7.5497899548918821691639751442098584e-8
//
// Common constants used to evaluate sinf4/cosf4/tanf4
//
#define _SINCOS_CC0 -0.0013602249f
#define _SINCOS_CC1 0.0416566950f
#define _SINCOS_CC2 -0.4999990225f
#define _SINCOS_SC0 -0.0001950727f
#define _SINCOS_SC1 0.0083320758f
#define _SINCOS_SC2 -0.1666665247f
#define _SINCOS_KC1 1.57079625129f
#define _SINCOS_KC2 7.54978995489e-8f
//
// Common constants used to evaluate sinf4est/cosf4est
//
#define _SINCOS_R1 -0.1666665668f
#define _SINCOS_R2 0.8333025139e-2f
#define _SINCOS_R3 -0.1980741872e-3f
#define _SINCOS_R4 0.2601903036e-5f
#define _SINCOS_C1 (201.0f/64.0f)
#define _SINCOS_C2 9.67653589793e-4f
// common constants used to evaluate sinf/cosf
#define _SIN_C1 -0.35950439e-4f
#define _SIN_C2 0.2490001007e-2f
#define _SIN_C3 -0.8074543253e-1f
#define _SIN_C4 0.7853981633f
#define _COS_C1 -0.31872783e-3f
#define _COS_C2 0.1584968416e-1f
#define _COS_C3 -0.30842416558f
#define _COS_C4 0.9999999673f
#define POW2(x) x*x
#define SPOLY(x) (((_SIN_C1 * POW2(x) + _SIN_C2) * POW2(x) + (_SIN_C3)) * POW2(x) + _SIN_C4) * x
#define CPOLY(x) (((_COS_C1 * POW2(x) + _COS_C2) * POW2(x) + (_COS_C3)) * POW2(x) + _COS_C4)
#define M_PI 3.141592653589793f
#endif

View File

@@ -1,106 +0,0 @@
/* sincosf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "sincos_c.h"
#include "common-types.h"
//
// Computes both the sine and cosine of the all four slots of x
// by using a polynomial approximation.
//
void sincosf4 (vector float x, vector float *s, vector float *c)
{
vec_float4 xl,xl2,xl3;
vec_int4 q;
vec_int4 offsetSin, offsetCos;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Compute the offset based on the quadrant that the angle falls in.
// Add 1 to the offset for the cosine.
//
offsetSin = vec_and(q,vec_splatsi4((int)0x3));
offsetCos = vec_add(vec_splatsi4(1),offsetSin);
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = vec_ctf(q,0);
vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x);
xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1));
vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1));
vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2));
vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2));
vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f));
vec_float4 sx = vec_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x1)),vec_splatsi4(0));
vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x1)),vec_splatsi4(0));
*s = vec_sel(cx,sx,sinMask);
*c = vec_sel(cx,sx,cosMask);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,vec_splatsi4(0x2)),vec_splatsi4(0));
cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,vec_splatsi4(0x2)),vec_splatsi4(0));
*s = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*s),*s,sinMask);
*c = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000),(vec_uint4)*c),*c,cosMask);
}

View File

@@ -1,103 +0,0 @@
/* sinf4 -
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <simdmath.h>
#include <altivec.h>
#include "sincos_c.h"
#include "common-types.h"
//
// Computes the sine of each of the four slots
// by using a polynomial approximation.
//
vector float
sinf4 (vector float x)
{
vec_float4 xl,xl2,xl3,res;
vec_int4 q;
// Range reduction using : xl = angle * TwoOverPi;
//
xl = vec_madd(x, vec_splatsf4(0.63661977236f),vec_splatsf4(0.0f));
// Find the quadrant the angle falls in
// using: q = (int) (ceil(abs(xl))*sign(xl))
//
xl = vec_add(xl,vec_sel(vec_splatsf4(0.5f),xl,vec_splatsu4(0x80000000)));
q = vec_cts(xl,0);
// Compute an offset based on the quadrant that the angle falls in
//
vec_int4 offset = vec_and(q,vec_splatsi4((int)0x3));
// Remainder in range [-pi/4..pi/4]
//
vec_float4 qf = vec_ctf(q,0);
vec_float4 p1 = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC1),x);
xl = vec_nmsub(qf,vec_splatsf4(_SINCOS_KC2),p1);
// Compute x^2 and x^3
//
xl2 = vec_madd(xl,xl,vec_splatsf4(0.0f));
xl3 = vec_madd(xl2,xl,vec_splatsf4(0.0f));
// Compute both the sin and cos of the angles
// using a polynomial expression:
// cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
// sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
//
vec_float4 ct1 = vec_madd(vec_splatsf4(_SINCOS_CC0),xl2,vec_splatsf4(_SINCOS_CC1));
vec_float4 st1 = vec_madd(vec_splatsf4(_SINCOS_SC0),xl2,vec_splatsf4(_SINCOS_SC1));
vec_float4 ct2 = vec_madd(ct1,xl2,vec_splatsf4(_SINCOS_CC2));
vec_float4 st2 = vec_madd(st1,xl2,vec_splatsf4(_SINCOS_SC2));
vec_float4 cx = vec_madd(ct2,xl2,vec_splatsf4(1.0f));
vec_float4 sx = vec_madd(st2,xl3,xl);
// Use the cosine when the offset is odd and the sin
// when the offset is even
//
vec_uint4 mask1 = (vec_uint4)vec_cmpeq(vec_and(offset,
vec_splatsi4(0x1)),
vec_splatsi4((int)(0)));
res = vec_sel(cx,sx,mask1);
// Flip the sign of the result when (offset mod 4) = 1 or 2
//
vec_uint4 mask2 = (vec_uint4)vec_cmpeq(vec_and(offset,vec_splatsi4(0x2)),vec_splatsi4((int)0));
res = vec_sel((vec_float4)vec_xor(vec_splatsu4(0x80000000U),(vec_uint4)res),res,mask2);
return res;
}

View File

@@ -35,10 +35,10 @@ STATIC_TESTS = $(TESTS)
SHARED_TESTS = $(TESTS:=.shared)
ALL_TESTS = $(STATIC_TESTS) $(SHARED_TESTS)
INCLUDES_PPU = -I../../
INCLUDES_PPU = -I../../common
ARCH_PPU = 64
CROSS_PPU = ppu-
ARCH_PPU = 32
CROSS_PPU =
AR_PPU = $(CROSS_PPU)ar
CC_PPU = $(CROSS_PPU)gcc
CXX_PPU = $(CROSS_PPU)g++
@@ -111,21 +111,8 @@ shared_check:
../$(SHARED_LIB):
cd ../;$(MAKE) $(MAKE_DEFS) $(SHARED_LIB)
%.o: %.c common-test.h testutils.h
%.o: %.c ../../common/common-test.h testutils.h
$(CC_PPU) $(CFLAGS_PPU) -c $<
#----------
# C++
#----------
%.o: %.C
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cpp
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cc
$(CXX_PPU) $(CFLAGS_PPU) -c $<
%.o: %.cxx
$(CXX_PPU) $(CFLAGS_PPU) -c $<

View File

@@ -1,198 +0,0 @@
/* Header file for common parts of the testsuite
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
static inline unsigned long long clock()
{
unsigned long long ret;
/* This need to be fixed for the hardware errata. */
__asm __volatile__ ( "mftb %0\n"
: "=r" (ret)
:
: "memory");
return (ret);
}
// Test files begin with TEST_SET_START("your initials","test set description")
// Individual tests begin with TEST_START("name of test")
// and end with TEST_PASS(), TEST_FAIL("reason for failure") or TEST_CHECK(<test to evaluate>)
// Or you can run a test encapsulated in a function with:
// TEST_FUNCTION("name of test", function(), "reason for failure")
//
// The clock starts when you call TEST_START and stops with TEST_PASS, TEST_FAIL or TEST_CHECK
// After a start there can be several PASS, FAIL or CHECK calls, each one counts as a test, time is measured from the prior call
//
char
*__initials, // Test owner's initials
*__description, // short descriptive name for this test set
*__name, // name of the currently running test
*__set_id; // id of the the test set
int
// __zip=0,
__success=1, // set to 0 if any tests failed
__count, // Total number of tests run
__passed; // Total number of tests passed
unsigned long long
__ttemp,
__time, // For timing tests (usually start time of last test)
__ttime; // Cumulative test runtime NOT counting runtime of the TEST macros
// TEST_SET_START
// Call at the start of a set of related tests to identify them
// Prints a "start of set banner message"
// set_id - unique test set identifyer a time in the format yyyymmddhhmmss followed by your initials ie: 20040716104615GAC
// initials - your initials
// description - brief descriptive name for this test set
#define TEST_SET_START(set_id,initials,description) \
do { \
__set_id=set_id; \
__initials=initials; \
__description=description; \
__count=0; \
__passed=0; \
__time=0; \
__ttime=0; \
printf("0\t%s\t%d\t%s\tSTART\tpassed\ttotal\ttime\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,__set_id, __description); \
} while(0)
// TEST_START
// Begins a test, and starts the clock
// name - brief name for this test
#define TEST_START(name) \
do { \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
__name=name; \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
} while(0)
// TEST_PASS
// Indicates the test passed
// test_id - unique test ID number, same format as the set_id number
// This should match the id provided to the matching TEST_FAIL call
#define TEST_PASS(test_id) \
do { \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \
__time=__ttemp-__time; \
__ttime+=__time; \
__count++; \
__passed++; \
printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%lld\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
} while(0)
// TEST_FAIL
// Indicates the test failed
// test_id - unique test ID number, same format as the set_id number
// This should match the id provided to the matching TEST_PASS call
// why - brief description of why it failed
#define TEST_FAIL(test_id,why,error_code) \
do { \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \
__time=__ttemp-__time; \
__ttime+=__time; \
__count++; \
__success=0; \
printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,error_code); \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
} while(0)
// TEST_CHECK
// Passes or fails the test after evaluating the "test" argument (just like assert but without terminating the program)
// The clock is immediately stopped so the time required to evaluate "test" will NOT be included in the reported time
// If the test failed, the reason will be printed as FAILED BECAUSE: check (value of "test") failed
// test_id - unique test ID number, same format as the set_id number
// test - expression evaluating to true/false
#define TEST_CHECK(test_id,test,error_code) \
do { \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \
__time=__ttemp-__time; \
__ttime+=__time; \
__count++; \
if(test) \
{ \
__passed++; \
printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%lld\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \
} \
else \
{ \
__success=0; \
printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%lld\t%s\t%s\t%s\tFAILED BECAUSE: check %s failed\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,#test,error_code); \
} \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
} while(0)
// TEST_FUNCTION
// Runs a test encapsulated in a function that returns 0 if the test passed and an error number if it failed
// The clock is started on calling the function and stopped as soon as it returns so the branching logic will not be included in the time
// test_id - unique test ID number, same format as the set_id number
// name - brief name for the test
// func - function invocation (should include parenthesis, may have arguments)
// why - brief description to print if the test fails
#define TEST_FUNCTION(test_id,name,func,why) \
do { \
TEST_START(name); \
int result=func; \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__ttemp) :: "memory"); \
__time=__ttemp-__time; \
__ttime+=__time; \
__count++; \
if(result==0) \
{ \
__passed++; \
printf("1\t%s\t%d\t%s\tPASS\t%d\t%d\t%d\t%s\t%s\t%s\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name); \
} \
else \
{ \
__success=0; \
printf("1\t%s\t%d\t%s\tFAIL\t%d\t%d\t%d\t%s\t%s\t%s\tFAILED BECAUSE: %s\t%d\n",__FILE__,__LINE__,__initials,__passed,__count,__time,__set_id,test_id,__name,why,result); \
} \
__asm __volatile__ ( "mftb %0 \n" : "=r" (__time) :: "memory"); \
} while(0)
// TEST_SET_DONE
// Ends a set of tests, prints out the closing banner (OK if all tests pass, PROBLEM if any fail)
// Also prints count of tests passed, tests run and total time
#define TEST_SET_DONE() \
do { \
printf("9\t%s\t%d\t%s\t%s\t%d\t%d\t%lld\t%s\tunique test id \t%s\n",__FILE__,__LINE__,__initials,(__count==__passed)?"OK":"PROBLEM",__passed,__count,__ttime,__set_id,__description); \
} while(0)
// TEST_EXIT
// Call this ONCE at the very end of the test program, it calls "exit" to return
// EXIT_SUCCESS if all tests passed or EXIT_FAILURE if any tests failed.
// This allows the makefile/shell script running the tests to know which ones failed
#define TEST_EXIT() \
do { \
printf("FINISHED!\n"); \
if(__success) \
exit(0); \
else \
exit(-1); \
} while (0)

View File

@@ -1,189 +0,0 @@
/* Header file for common parts of the testsuite
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
All rights reserved.
Redistribution and use in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Sony Computer Entertainment Inc nor the names
of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _FLOATINGPOINT_TESTS_H_
#define _FLOATINGPOINT_TESTS_H_
#if defined(__PPC__)
#include <altivec.h>
#define vec_uchar16 vector unsigned char
#define vec_char16 vector signed char
#define vec_ushort8 vector unsigned short
#define vec_short8 vector signed short
#define vec_uint4 vector unsigned int
#define vec_int4 vector signed int
#define vec_ullong2 vector unsigned long long
#define vec_llong2 vector signed long long
#define vec_float4 vector float
#define vec_double2 vector double
#else
#if __SPU__
#include <spu_intrinsics.h>
#endif
#endif
// To avoid type punning warnings (for printing in hex notation, doing bit-diff etc)
typedef union {
double d;
unsigned char uc[8];
unsigned int ui[2];
unsigned long long int ull;
} sce_math_alt_double;
typedef union {
float f;
unsigned char uc[4];
unsigned int ui;
} sce_math_alt_float;
#if (__PPC__ || __SPU__)
typedef union {
vec_int4 vsi;
int si[4];
} sce_math_alt_vec_int4;
typedef union {
vec_uint4 vui;
int ui[4];
} sce_math_alt_vec_uint4;
typedef union {
vec_float4 vf;
float sf[4];
unsigned int ui[4];
} sce_math_alt_vec_float4;
#endif
#if __SPU__
typedef union {
double sd[2];
vec_double2 vd;
unsigned long long int ui[2];
} sce_math_alt_vec_double2;
#endif
#if __PPC__
inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) {
vec_int4 refi = (vec_int4)ref;
vec_int4 valsi = (vec_int4)vals;
vec_int4 diff = vec_sub(refi, valsi);
vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff);
return vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) ));
}
inline int bitdiff(float ref, float val) {
sce_math_alt_float aref, aval;
aref.f = ref;
aval.f = val;
int diff = aref.ui - aval.ui;
return (diff>0)?diff:-diff;
}
inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) {
vec_int4 refi = (vec_int4)ref;
vec_int4 valsi = (vec_int4)vals;
vec_int4 diff = vec_sub(refi, valsi);
vec_int4 negdiff = vec_sub(((vec_int4){0,0,0,0}), diff);
diff = vec_sel(negdiff, diff, vec_cmpgt(diff, ((vec_int4){0,0,0,0}) ));
vec_float4 logdiff = vec_loge(vec_ctf(diff,0));
return vec_sub(((vec_int4){32,32,32,32}), vec_cts(vec_ceil(logdiff),0));
}
inline int bitmatch(float ref, float val) {
sce_math_alt_vec_float4 aref, aval;
sce_math_alt_vec_int4 adiff;
aref.sf[0] = ref;
aval.sf[0] = val;
adiff.vsi = bitmatch4(aref.vf, aval.vf);
return adiff.si[0];
}
inline float extractFloat(vec_float4 vf, int index)
{
sce_math_alt_vec_float4 vec;
vec.vf = vf;
return vec.sf[index];
}
inline int extractInt(vec_int4 vi, int index)
{
sce_math_alt_vec_int4 vec;
vec.vsi = vi;
return vec.si[index];
}
inline int extractUInt(vec_uint4 vi, int index)
{
sce_math_alt_vec_uint4 vec;
vec.vui = vi;
return vec.ui[index];
}
#else
#if __SPU__
inline vec_int4 bitdiff4(vec_float4 ref, vec_float4 vals) {
vec_int4 refi = (vec_int4)ref;
vec_int4 valsi = (vec_int4)vals;
vec_int4 diff = spu_sub(refi, valsi);
vec_int4 negdiff = spu_sub(spu_splats((int)0), diff);
return spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0));
}
inline int bitdiff(float ref, float val) {
return spu_extract(bitdiff4(spu_promote(ref,0), spu_promote(val,0)), 0);
}
inline vec_int4 bitmatch4(vec_float4 ref, vec_float4 vals) {
vec_int4 refi = (vec_int4)ref;
vec_int4 valsi = (vec_int4)vals;
vec_int4 diff = spu_sub(refi, valsi);
vec_int4 negdiff = spu_sub(spu_splats((int)0), diff);
return (vec_int4)spu_cntlz(spu_sel(negdiff, diff, (vec_uchar16)spu_cmpgt(diff, 0)));
}
inline int bitmatch(float ref, float val) {
return spu_extract(bitmatch4(spu_promote(ref,0), spu_promote(val,0)), 0);
}
#else
inline int bitdiff(sce_math_alt_float ref, sce_math_alt_float val) {
int diff = ref.ui - val.ui;
return((diff>0)?diff:-diff);
}
inline int bitmatch(sce_math_alt_float ref, sce_math_alt_float val) {
int diff, i;
unsigned int udiff;
diff = ref.ui - val.ui;
udiff = (diff>0) ? diff : -diff;
i = 32;
while(udiff != 0) {
i = i-1;
udiff = udiff >> 1;
}
return udiff;
}
#endif // __SPU__
#endif // __PPC__
#endif // _FLOATINGPOINT_TESTS_H_

View File

@@ -32,7 +32,17 @@
#ifndef _TESTUTILS_H_
#include "floatingpoint_tests.h"
#include <altivec.h>
#define vec_uchar16 vector unsigned char
#define vec_char16 vector signed char
#define vec_ushort8 vector unsigned short
#define vec_short8 vector signed short
#define vec_uint4 vector unsigned int
#define vec_int4 vector signed int
#define vec_ullong2 vector unsigned long long
#define vec_llong2 vector signed long long
#define vec_float4 vector float
#define vec_double2 vector double
extern unsigned int hide_uint( unsigned int x );
extern int hide_int( int x );